From 7d70610f1a096b55356ddb94ccbf57de7f7156e8 Mon Sep 17 00:00:00 2001 From: Christian Sanchez Date: Wed, 8 Oct 2025 10:54:44 -0700 Subject: [PATCH 01/23] adding retry policy and raising exceptions on error --- card_data/pipelines/defs/load/load_data.py | 32 ++++++++++++++++++---- 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/card_data/pipelines/defs/load/load_data.py b/card_data/pipelines/defs/load/load_data.py index 2da4c1ca..17b31fe4 100644 --- a/card_data/pipelines/defs/load/load_data.py +++ b/card_data/pipelines/defs/load/load_data.py @@ -1,4 +1,5 @@ import dagster as dg +from dagster import RetryPolicy, Backoff from sqlalchemy.exc import OperationalError from ..extract.extract_data import ( extract_series_data, @@ -11,7 +12,11 @@ from pathlib import Path -@dg.asset(deps=[extract_series_data], kinds={"Supabase", "Postgres"}) +@dg.asset( + deps=[extract_series_data], + kinds={"Supabase", "Postgres"}, + retry_policy=RetryPolicy(max_retries=3, delay=2, backoff=Backoff.EXPONENTIAL) +) def load_series_data() -> None: database_url: str = fetch_secret() table_name: str = "staging.series" @@ -23,7 +28,8 @@ def load_series_data() -> None: ) print(colored(" ✓", "green"), f"Data loaded into {table_name}") except OperationalError as e: - print(colored(" ✖", "red"), "Error:", e) + print(colored(" ✖", "red"), "Connection error in load_series_data():", e) + raise @dg.asset(deps=[load_series_data], kinds={"Soda"}, key_prefix=["staging"], name="series") @@ -54,7 +60,13 @@ def data_quality_check_on_series() -> None: print(result.stderr) -@dg.asset(deps=[extract_set_data], kinds={"Supabase", "Postgres"}, key_prefix=["staging"], name="sets") +@dg.asset( + deps=[extract_set_data], + kinds={"Supabase", "Postgres"}, + key_prefix=["staging"], + name="sets", + retry_policy=RetryPolicy(max_retries=3, delay=2, backoff=Backoff.EXPONENTIAL) +) def load_set_data() -> None: database_url: str = fetch_secret() table_name: str = "staging.sets" @@ -66,10 +78,17 @@ def load_set_data() -> None: ) print(colored(" ✓", "green"), f"Data loaded into {table_name}") except OperationalError as e: - print(colored(" ✖", "red"), "Error:", e) + print(colored(" ✖", "red"), "Connection error in load_set_data():", e) + raise -@dg.asset(deps=[create_card_dataframe], kinds={"Supabase", "Postgres"}, key_prefix=["staging"], name="cards") +@dg.asset( + deps=[create_card_dataframe], + kinds={"Supabase", "Postgres"}, + key_prefix=["staging"], + name="cards", + retry_policy=RetryPolicy(max_retries=3, delay=2, backoff=Backoff.EXPONENTIAL) +) def load_card_data() -> None: database_url: str = fetch_secret() table_name: str = "staging.cards" @@ -81,4 +100,5 @@ def load_card_data() -> None: ) print(colored(" ✓", "green"), f"Data loaded into {table_name}") except OperationalError as e: - print(colored(" ✖", "red"), "Error:", e) + print(colored(" ✖", "red"), "Connection error in load_card_data():", e) + raise \ No newline at end of file From 547dbff68a5be1866074e7420ba94d42a28baa4e Mon Sep 17 00:00:00 2001 From: Christian Sanchez Date: Wed, 8 Oct 2025 11:19:24 -0700 Subject: [PATCH 02/23] initial commit --- .../defs/extract/extract_pricing_data.py | 96 +++++++++++++++++++ 1 file changed, 96 insertions(+) create mode 100644 card_data/pipelines/defs/extract/extract_pricing_data.py diff --git a/card_data/pipelines/defs/extract/extract_pricing_data.py b/card_data/pipelines/defs/extract/extract_pricing_data.py new file mode 100644 index 00000000..94790523 --- /dev/null +++ b/card_data/pipelines/defs/extract/extract_pricing_data.py @@ -0,0 +1,96 @@ +import time +import json + +import dagster as dg +import polars as pl + +from pydantic import BaseModel, HttpUrl, ValidationError +from typing import Optional +from termcolor import colored + +import requests + +SET_PRODUCT_MATCHING = { + "sv01": "22873", + "sv02": "23120", +} + +class CardPricing(BaseModel): + product_id: int + name: str + card_number: str + market_price: float + +def is_card(item): + """Check if item has a 'Number' field in extendedData""" + for data_field in item["extendedData"]: + if data_field["name"] == "Number": + return True + return False + +def get_card_number(card): + """Get the card number from extendedData""" + for data_field in card["extendedData"]: + if data_field["name"] == "Number": + return data_field["value"] + return None + +def pull_product_information(set_number: str) -> pl.DataFrame | str: + url = f"https://tcgcsv.com/tcgplayer/3/{SET_PRODUCT_MATCHING[set_number]}/products" + data = requests.get(url).json() + r = requests.get(url) + + try: + validated: list[CardPricing] = [CardPricing(**item) for item in data] + print( + colored(" ✓", "green"), "Pydantic validation passed for all series entries." + ) + if r.status_code == 200: + print( + colored(" ✓", "green"), "Successful connection to API." + ) + + url_prices = f"https://tcgcsv.com/tcgplayer/3/{SET_PRODUCT_MATCHING[set_number]}/prices" + r_prices = requests.get(url_prices) + price_data = r_prices.json() + + price_dict = {price["productId"]: price["marketPrice"] + for price in price_data["results"]} + + product_id_list = [] + name_list = [] + card_number_list = [] + price_list = [] + + for card in data["results"]: + if not is_card(card): + continue + + number = get_card_number(card) + card_number_list.append(number) + + name = card["name"].partition("-")[0].strip() if "-" in card["name"] else card["name"] + name_list.append(name) + + product_id = card["productId"] + product_id_list.append(product_id) + + market_price = price_dict.get(product_id) + price_list.append(market_price) + + df = pl.DataFrame({ + "product_id": product_id_list, + "name": name_list, + "card_number": card_number_list, + "market_price": price_list, + }).with_columns(pl.col("market_price").cast(pl.Decimal(scale=2))) + + return df + + else: + return str(colored(" ✖", "red"), f"Connection error to API: {r.status_code}") + + except ValidationError as e: + print(colored(" ✖", "red"), "Pydantic validation failed.") + print(e) + raise From eb9e699255c369240e44af255637167da193a031 Mon Sep 17 00:00:00 2001 From: Christian Sanchez Date: Wed, 8 Oct 2025 15:53:36 -0700 Subject: [PATCH 03/23] successfully returning dataframe in Dagster --- .../defs/extract/extract_pricing_data.py | 126 +++++++++--------- card_data/tcg_pricing.py | 70 ---------- 2 files changed, 66 insertions(+), 130 deletions(-) delete mode 100644 card_data/tcg_pricing.py diff --git a/card_data/pipelines/defs/extract/extract_pricing_data.py b/card_data/pipelines/defs/extract/extract_pricing_data.py index 94790523..f22b6676 100644 --- a/card_data/pipelines/defs/extract/extract_pricing_data.py +++ b/card_data/pipelines/defs/extract/extract_pricing_data.py @@ -1,96 +1,102 @@ -import time -import json +from typing import Optional import dagster as dg import polars as pl - -from pydantic import BaseModel, HttpUrl, ValidationError -from typing import Optional +import requests +from pydantic import BaseModel, ValidationError from termcolor import colored -import requests SET_PRODUCT_MATCHING = { "sv01": "22873", "sv02": "23120", } + class CardPricing(BaseModel): product_id: int name: str card_number: str - market_price: float + market_price: Optional[float] = None + -def is_card(item): +def is_card(item: dict) -> bool: """Check if item has a 'Number' field in extendedData""" - for data_field in item["extendedData"]: - if data_field["name"] == "Number": - return True - return False + return any( + data_field.get("name") == "Number" + for data_field in item.get("extendedData", []) + ) + -def get_card_number(card): +def get_card_number(card: dict) -> Optional[str]: """Get the card number from extendedData""" - for data_field in card["extendedData"]: - if data_field["name"] == "Number": - return data_field["value"] + for data_field in card.get("extendedData", []): + if data_field.get("name") == "Number": + return data_field.get("value") return None -def pull_product_information(set_number: str) -> pl.DataFrame | str: - url = f"https://tcgcsv.com/tcgplayer/3/{SET_PRODUCT_MATCHING[set_number]}/products" - data = requests.get(url).json() - r = requests.get(url) - - try: - validated: list[CardPricing] = [CardPricing(**item) for item in data] - print( - colored(" ✓", "green"), "Pydantic validation passed for all series entries." - ) - if r.status_code == 200: - print( - colored(" ✓", "green"), "Successful connection to API." - ) - - url_prices = f"https://tcgcsv.com/tcgplayer/3/{SET_PRODUCT_MATCHING[set_number]}/prices" - r_prices = requests.get(url_prices) - price_data = r_prices.json() - price_dict = {price["productId"]: price["marketPrice"] - for price in price_data["results"]} +def extract_card_name(full_name: str) -> str: + """Extract clean card name, removing variant information after dash""" + return full_name.partition("-")[0].strip() if "-" in full_name else full_name - product_id_list = [] - name_list = [] - card_number_list = [] - price_list = [] - for card in data["results"]: - if not is_card(card): - continue +def pull_product_information(set_number: str) -> pl.DataFrame: + """Pull product and pricing information for a given set number.""" - number = get_card_number(card) - card_number_list.append(number) + print(colored(" →", "blue"), f"Processing set: {set_number}") - name = card["name"].partition("-")[0].strip() if "-" in card["name"] else card["name"] - name_list.append(name) + product_id = SET_PRODUCT_MATCHING[set_number] - product_id = card["productId"] - product_id_list.append(product_id) + # Fetch product data + products_url = (f"https://tcgcsv.com/tcgplayer/3/{product_id}/products") + products_data = requests.get(products_url, timeout=30).json() - market_price = price_dict.get(product_id) - price_list.append(market_price) + # Fetch pricing data + prices_url = (f"https://tcgcsv.com/tcgplayer/3/{product_id}/prices") + prices_data = requests.get(prices_url, timeout=30).json() - df = pl.DataFrame({ - "product_id": product_id_list, - "name": name_list, - "card_number": card_number_list, - "market_price": price_list, - }).with_columns(pl.col("market_price").cast(pl.Decimal(scale=2))) + price_dict = { + price["productId"]: price.get("marketPrice") + for price in prices_data.get("results", []) + } - return df + cards_data = [] + for card in products_data.get("results", []): + if not is_card(card): + continue - else: - return str(colored(" ✖", "red"), f"Connection error to API: {r.status_code}") + card_info = { + "product_id": card["productId"], + "name": extract_card_name(card["name"]), + "card_number": get_card_number(card), + "market_price": price_dict.get(card["productId"]), + } + cards_data.append(card_info) + # Pydantic validation + try: + validated: list[CardPricing] = [CardPricing(**card) for card in cards_data] + print( + colored(" ✓", "green"), + f"Pydantic validation passed for {len(validated)} cards.", + ) except ValidationError as e: print(colored(" ✖", "red"), "Pydantic validation failed.") print(e) raise + + df_data = [card.model_dump(mode="json") for card in validated] + return pl.DataFrame(df_data) + + +@dg.asset(kinds={"API", "Polars", "Pydantic"}) +def build_dataframe() -> pl.DataFrame: + all_cards = [] + for set_number in SET_PRODUCT_MATCHING.keys(): + df = pull_product_information(set_number) + all_cards.append(df) + + concatenated = pl.concat(all_cards) + print(concatenated) + return concatenated diff --git a/card_data/tcg_pricing.py b/card_data/tcg_pricing.py deleted file mode 100644 index 54f1b1f3..00000000 --- a/card_data/tcg_pricing.py +++ /dev/null @@ -1,70 +0,0 @@ -import requests -import polars as pl - -pl.Config(tbl_rows=-1) - -SET_PRODUCT_MATCHING = { - "sv01": "22873" -} - -def is_card(item): - """Check if item has a 'Number' field in extendedData""" - for data_field in item["extendedData"]: - if data_field["name"] == "Number": - return True - return False - -def get_card_number(card): - """Get the card number from extendedData""" - for data_field in card["extendedData"]: - if data_field["name"] == "Number": - return data_field["value"] - return None - -def pull_product_information(): - url = f"https://tcgcsv.com/tcgplayer/3/{SET_PRODUCT_MATCHING['sv01']}/products" - r = requests.get(url) - - if r.status_code != 200: - return - - data = r.json() - - url_prices = f"https://tcgcsv.com/tcgplayer/3/22873/prices" - r_prices = requests.get(url_prices) - price_data = r_prices.json() - - price_dict = {price["productId"]: price["marketPrice"] - for price in price_data["results"]} - - product_id_list = [] - name_list = [] - card_number_list = [] - price_list = [] - - for card in data["results"]: - if not is_card(card): - continue - - number = get_card_number(card) - card_number_list.append(number) - - name = card["name"].partition("-")[0].strip() if "-" in card["name"] else card["name"] - name_list.append(name) - - product_id = card["productId"] - product_id_list.append(product_id) - - market_price = price_dict.get(product_id) - price_list.append(market_price) - - df = pl.DataFrame({ - "product_id": product_id_list, - "name": name_list, - "card_number": card_number_list, - "market_price": price_list, - }).with_columns(pl.col("market_price").cast(pl.Decimal(scale=2))) - - print(df.sort("card_number")) - -pull_product_information() From b69eaaa5c245681dd28631359a96a721c0c457ab Mon Sep 17 00:00:00 2001 From: Christian Sanchez Date: Wed, 8 Oct 2025 16:12:09 -0700 Subject: [PATCH 04/23] dropping constraints and adding run_query --- .../macros/create_relationships.sql | 33 ++++++++++++------- 1 file changed, 22 insertions(+), 11 deletions(-) diff --git a/card_data/pipelines/poke_cli_dbt/macros/create_relationships.sql b/card_data/pipelines/poke_cli_dbt/macros/create_relationships.sql index e7b8535b..9efef9a1 100644 --- a/card_data/pipelines/poke_cli_dbt/macros/create_relationships.sql +++ b/card_data/pipelines/poke_cli_dbt/macros/create_relationships.sql @@ -1,16 +1,27 @@ {% macro create_relationships() %} - ALTER TABLE {{ target.schema }}.series ADD CONSTRAINT pk_series PRIMARY KEY (id); - ALTER TABLE {{ target.schema }}.sets ADD CONSTRAINT pk_sets PRIMARY KEY (set_id); - ALTER TABLE {{ target.schema }}.cards ADD CONSTRAINT pk_cards PRIMARY KEY (id); + {{ print("Dropping existing constraints...") }} - ALTER TABLE public.sets - ADD CONSTRAINT fk_sets_series - FOREIGN KEY (series_id) - REFERENCES public.series (id); + -- Drop existing constraints if they exist (in reverse dependency order) + {% do run_query("ALTER TABLE " ~ target.schema ~ ".cards DROP CONSTRAINT IF EXISTS fk_cards_sets") %} + {% do run_query("ALTER TABLE " ~ target.schema ~ ".sets DROP CONSTRAINT IF EXISTS fk_sets_series") %} + {% do run_query("ALTER TABLE " ~ target.schema ~ ".cards DROP CONSTRAINT IF EXISTS pk_cards") %} + {% do run_query("ALTER TABLE " ~ target.schema ~ ".sets DROP CONSTRAINT IF EXISTS pk_sets") %} + {% do run_query("ALTER TABLE " ~ target.schema ~ ".series DROP CONSTRAINT IF EXISTS pk_series") %} - ALTER TABLE public.cards - ADD CONSTRAINT fk_cards_sets - FOREIGN KEY (set_id) - REFERENCES public.sets (set_id); + {{ print("Adding primary keys...") }} + -- Add primary keys + {% do run_query("ALTER TABLE " ~ target.schema ~ ".series ADD CONSTRAINT pk_series PRIMARY KEY (id)") %} + {% do run_query("ALTER TABLE " ~ target.schema ~ ".sets ADD CONSTRAINT pk_sets PRIMARY KEY (set_id)") %} + {% do run_query("ALTER TABLE " ~ target.schema ~ ".cards ADD CONSTRAINT pk_cards PRIMARY KEY (id)") %} + + {{ print("Adding foreign keys...") }} + + -- Add foreign keys + {% do run_query("ALTER TABLE " ~ target.schema ~ ".sets ADD CONSTRAINT fk_sets_series FOREIGN KEY (series_id) REFERENCES " ~ target.schema ~ ".series (id)") %} + {% do run_query("ALTER TABLE " ~ target.schema ~ ".cards ADD CONSTRAINT fk_cards_sets FOREIGN KEY (set_id) REFERENCES " ~ target.schema ~ ".sets (set_id)") %} + + {{ print("Relationships created successfully") }} + + {% do return('') %} {% endmacro %} \ No newline at end of file From 8979621f47f43034e190e0a5bc289eabcebcd19d Mon Sep 17 00:00:00 2001 From: Christian Sanchez Date: Wed, 8 Oct 2025 16:12:38 -0700 Subject: [PATCH 05/23] updating selected columns --- card_data/pipelines/poke_cli_dbt/models/cards.sql | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/card_data/pipelines/poke_cli_dbt/models/cards.sql b/card_data/pipelines/poke_cli_dbt/models/cards.sql index dfcaad7a..de27fa1a 100644 --- a/card_data/pipelines/poke_cli_dbt/models/cards.sql +++ b/card_data/pipelines/poke_cli_dbt/models/cards.sql @@ -3,5 +3,5 @@ post_hook="{{ enable_rls() }}" ) }} -SELECT id, image, name, "localId", category, hp +SELECT id, set_id, image, name, "localId", category, hp FROM {{ source('staging', 'cards') }} \ No newline at end of file From 90ab475faa028a86d03599f5ac8bbe91a74f957c Mon Sep 17 00:00:00 2001 From: Christian Sanchez Date: Thu, 9 Oct 2025 10:55:05 -0700 Subject: [PATCH 06/23] adding name mapping for dbt models --- .../defs/transformation/transform_data.py | 26 +++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/card_data/pipelines/defs/transformation/transform_data.py b/card_data/pipelines/defs/transformation/transform_data.py index d01b185e..ab1b8000 100644 --- a/card_data/pipelines/defs/transformation/transform_data.py +++ b/card_data/pipelines/defs/transformation/transform_data.py @@ -1,10 +1,32 @@ import dagster as dg -from dagster_dbt import DbtCliResource, dbt_assets +from dagster_dbt import DbtCliResource, DagsterDbtTranslator, dbt_assets from pathlib import Path DBT_PROJECT_PATH = Path(__file__).joinpath("..", "..", "..", "poke_cli_dbt").resolve() -@dbt_assets(manifest=DBT_PROJECT_PATH / "target" / "manifest.json") +class CustomDbtTranslator(DagsterDbtTranslator): + def get_asset_key(self, dbt_resource_props): + + resource_type = dbt_resource_props["resource_type"] + name = dbt_resource_props["name"] + + if resource_type == "source": + # Map staging sources to load assets + source_mapping = { + "series": "quality_checks_series", + "sets": "load_set_data", + "cards": "load_card_data" + } + if name in source_mapping: + return dg.AssetKey([source_mapping[name]]) + + # For models, use default behavior + return super().get_asset_key(dbt_resource_props) + +@dbt_assets( + manifest=DBT_PROJECT_PATH / "target" / "manifest.json", + dagster_dbt_translator=CustomDbtTranslator() +) def poke_cli_dbt_assets(context: dg.AssetExecutionContext, dbt: DbtCliResource): """ dbt assets that transform staging data into final models. From ecbe298d9c0661c4c83f573ffd51f994d1dcf1ae Mon Sep 17 00:00:00 2001 From: Christian Sanchez Date: Thu, 9 Oct 2025 10:55:35 -0700 Subject: [PATCH 07/23] adding name to dagster asset --- card_data/pipelines/defs/extract/extract_pricing_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/card_data/pipelines/defs/extract/extract_pricing_data.py b/card_data/pipelines/defs/extract/extract_pricing_data.py index f22b6676..18bc97da 100644 --- a/card_data/pipelines/defs/extract/extract_pricing_data.py +++ b/card_data/pipelines/defs/extract/extract_pricing_data.py @@ -90,7 +90,7 @@ def pull_product_information(set_number: str) -> pl.DataFrame: return pl.DataFrame(df_data) -@dg.asset(kinds={"API", "Polars", "Pydantic"}) +@dg.asset(kinds={"API", "Polars", "Pydantic"}, name="build_pricing_dataframe") def build_dataframe() -> pl.DataFrame: all_cards = [] for set_number in SET_PRODUCT_MATCHING.keys(): From ecf8ad9df28ba7732e93de84d33bc8640609f249 Mon Sep 17 00:00:00 2001 From: Christian Sanchez Date: Thu, 9 Oct 2025 10:57:26 -0700 Subject: [PATCH 08/23] initial commit --- .../pipelines/defs/load/load_pricing_data.py | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 card_data/pipelines/defs/load/load_pricing_data.py diff --git a/card_data/pipelines/defs/load/load_pricing_data.py b/card_data/pipelines/defs/load/load_pricing_data.py new file mode 100644 index 00000000..d3017013 --- /dev/null +++ b/card_data/pipelines/defs/load/load_pricing_data.py @@ -0,0 +1,26 @@ +import dagster as dg +from dagster import RetryPolicy, Backoff +from sqlalchemy.exc import OperationalError +from ..extract.extract_pricing_data import build_dataframe +from ...utils.secret_retriever import fetch_secret +from termcolor import colored + + +@dg.asset( + deps=[build_dataframe], + kinds={"Supabase", "Postgres"}, + retry_policy=RetryPolicy(max_retries=3, delay=2, backoff=Backoff.EXPONENTIAL), +) +def load_pricing_data() -> None: + database_url: str = fetch_secret() + table_name: str = "staging.pricing_data" + + df = build_dataframe() + try: + df.write_database( + table_name=table_name, connection=database_url, if_table_exists="replace" + ) + print(colored(" ✓", "green"), f"Data loaded into {table_name}") + except OperationalError as e: + print(colored(" ✖", "red"), "Connection error in load_series_data():", e) + raise From 519a3cf42dfe165bb50cb21c9c8ff3e07e1abc03 Mon Sep 17 00:00:00 2001 From: Christian Sanchez Date: Thu, 9 Oct 2025 11:21:52 -0700 Subject: [PATCH 09/23] adding profile and run instructions --- docs/Infrastructure_Guide/terraform.md | 31 +++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/docs/Infrastructure_Guide/terraform.md b/docs/Infrastructure_Guide/terraform.md index 372c515b..5c8d1d24 100644 --- a/docs/Infrastructure_Guide/terraform.md +++ b/docs/Infrastructure_Guide/terraform.md @@ -30,13 +30,42 @@ After manually creating the resources in the [AWS](aws.md) section, [terraformer Terraformer acts as a "reverse Terraform" tool where it can read from created resources on AWS. Although the output can sometimes be a bit verbose, it's an easy way to capture everything to create reproducible builds. +### Profile +Before installing Terraformer, a profile from AWS will need to be configured under `~/.aws/credentials` which is `TOML` like file. + +Configure the `terraform-user` profile from [1. AWS](aws.md#iam) to look like this: + +```toml +[terraform-user] +aws_access_key_id = +aws_secret_access_key = +``` + +To retrieve these options, head to IAM > Users > teraform-user > Security Credentials tab + ### Install * [Install Guide](https://github.com/GoogleCloudPlatform/terraformer?tab=readme-ov-file#installation) -Install Terraformer: +Install Terraformer on macOS: ```bash brew install terraformer ``` +### Run + +Once Terraformer is installed, run the tool to start generated `.tf` files. + +For example, to retrieve the build for an RDS instance, run: + +```bash +terraformer import aws --regions us-west-2 --resources rds --profile terraform-user +``` + +Where: +* `--regions `is where the RDS instance is located. +* `--resources` is the resources under RDS +* `--profile` is the `terraform-user` profile from `~/.aws/credentials` +After running this command, Terraformer will create the `.tf` files under `~/generated`. +The path to where these will be created can be changed with the `--path-output` flag. \ No newline at end of file From dc4387bced660be886aff0da65a37f011f9a32ab Mon Sep 17 00:00:00 2001 From: Christian Sanchez Date: Fri, 10 Oct 2025 11:07:18 -0700 Subject: [PATCH 10/23] adding pricing_data table to dbt --- .../defs/transformation/transform_data.py | 3 ++- .../pipelines/poke_cli_dbt/models/pricing_data.sql | 7 +++++++ .../pipelines/poke_cli_dbt/models/sources.yml | 14 +++++++++++++- 3 files changed, 22 insertions(+), 2 deletions(-) create mode 100644 card_data/pipelines/poke_cli_dbt/models/pricing_data.sql diff --git a/card_data/pipelines/defs/transformation/transform_data.py b/card_data/pipelines/defs/transformation/transform_data.py index ab1b8000..b67bb473 100644 --- a/card_data/pipelines/defs/transformation/transform_data.py +++ b/card_data/pipelines/defs/transformation/transform_data.py @@ -15,7 +15,8 @@ def get_asset_key(self, dbt_resource_props): source_mapping = { "series": "quality_checks_series", "sets": "load_set_data", - "cards": "load_card_data" + "cards": "load_card_data", + "pricing_data": "load_pricing_data", } if name in source_mapping: return dg.AssetKey([source_mapping[name]]) diff --git a/card_data/pipelines/poke_cli_dbt/models/pricing_data.sql b/card_data/pipelines/poke_cli_dbt/models/pricing_data.sql new file mode 100644 index 00000000..3170a1bf --- /dev/null +++ b/card_data/pipelines/poke_cli_dbt/models/pricing_data.sql @@ -0,0 +1,7 @@ +{{ config( + materialized='table', + post_hook="{{ enable_rls() }}" +) }} + +SELECT name, card_number, market_price +FROM {{ source('staging', 'pricing_data') }} \ No newline at end of file diff --git a/card_data/pipelines/poke_cli_dbt/models/sources.yml b/card_data/pipelines/poke_cli_dbt/models/sources.yml index 4dd9b0ef..dac332ac 100644 --- a/card_data/pipelines/poke_cli_dbt/models/sources.yml +++ b/card_data/pipelines/poke_cli_dbt/models/sources.yml @@ -92,4 +92,16 @@ sources: - name: attack_2_effect description: "Second attack effect" - name: attack_2_cost - description: "Second attack energy cost" \ No newline at end of file + description: "Second attack energy cost" + + - name: pricing_data + description: "Card pricing data" + columns: + - name: product_id + description: "Product ID" + - name: name + description: "Card name" + - name: card_number + description: "Card number" + - name: market_price + description: "Market price" \ No newline at end of file From ca2fdb5247b5de9cebdb47880336d159b159d71c Mon Sep 17 00:00:00 2001 From: Christian Sanchez Date: Fri, 10 Oct 2025 11:12:57 -0700 Subject: [PATCH 11/23] updating soda checks --- card_data/pipelines/soda/checks.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/card_data/pipelines/soda/checks.yml b/card_data/pipelines/soda/checks.yml index 94749267..95d4b1a5 100644 --- a/card_data/pipelines/soda/checks.yml +++ b/card_data/pipelines/soda/checks.yml @@ -1,6 +1,6 @@ checks for series: # Row count validation - - row_count = 2 + - row_count = 3 # Schema validation checks - schema: From c6545e3e50d719a12666eecdefab9a01292b13f7 Mon Sep 17 00:00:00 2001 From: Christian Sanchez Date: Fri, 10 Oct 2025 15:52:06 -0700 Subject: [PATCH 12/23] updating models --- card_data/pipelines/poke_cli_dbt/models/cards.sql | 2 +- card_data/pipelines/poke_cli_dbt/models/pricing_data.sql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/card_data/pipelines/poke_cli_dbt/models/cards.sql b/card_data/pipelines/poke_cli_dbt/models/cards.sql index de27fa1a..ef5be21b 100644 --- a/card_data/pipelines/poke_cli_dbt/models/cards.sql +++ b/card_data/pipelines/poke_cli_dbt/models/cards.sql @@ -3,5 +3,5 @@ post_hook="{{ enable_rls() }}" ) }} -SELECT id, set_id, image, name, "localId", category, hp +SELECT id, set_id, image, name, "localId", category, hp, "set_cardCount_official", set_name FROM {{ source('staging', 'cards') }} \ No newline at end of file diff --git a/card_data/pipelines/poke_cli_dbt/models/pricing_data.sql b/card_data/pipelines/poke_cli_dbt/models/pricing_data.sql index 3170a1bf..dff35155 100644 --- a/card_data/pipelines/poke_cli_dbt/models/pricing_data.sql +++ b/card_data/pipelines/poke_cli_dbt/models/pricing_data.sql @@ -3,5 +3,5 @@ post_hook="{{ enable_rls() }}" ) }} -SELECT name, card_number, market_price +SELECT product_id, name, card_number, market_price FROM {{ source('staging', 'pricing_data') }} \ No newline at end of file From 1fc64c02e2cd57cd0ac6651961ba92baf4dfefaa Mon Sep 17 00:00:00 2001 From: Christian Sanchez Date: Sat, 11 Oct 2025 15:32:58 -0700 Subject: [PATCH 13/23] updating test outputs --- testdata/main_latest_flag.golden | 2 +- testdata/pokemon_abilities.golden | 2 ++ testdata/pokemon_defense.golden | 2 ++ testdata/pokemon_defense_ability_immunities.golden | 2 ++ testdata/pokemon_image.golden | 2 ++ testdata/pokemon_image_flag_non-valid_size.golden | 2 ++ testdata/pokemon_no_flags_dual_type.golden | 2 ++ testdata/pokemon_regional_form.golden | 2 ++ testdata/pokemon_regional_form_2.golden | 2 ++ testdata/pokemon_stats.golden | 2 ++ 10 files changed, 19 insertions(+), 1 deletion(-) diff --git a/testdata/main_latest_flag.golden b/testdata/main_latest_flag.golden index 99056778..c0f9ffd0 100644 --- a/testdata/main_latest_flag.golden +++ b/testdata/main_latest_flag.golden @@ -1,6 +1,6 @@ ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓ ┃ ┃ ┃ Latest available version: ┃ -┃ • v1.6.2 ┃ +┃ • v1.7.0 ┃ ┃ ┃ ┗━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┛ diff --git a/testdata/pokemon_abilities.golden b/testdata/pokemon_abilities.golden index e0d27a14..5eb43a9e 100644 --- a/testdata/pokemon_abilities.golden +++ b/testdata/pokemon_abilities.golden @@ -1,6 +1,7 @@ Your selected Pokémon: Metagross With four linked brains, it’s more intelligent than a supercomputer, and it uses calculations to analyze foes. + Steel Psychic @@ -8,6 +9,7 @@ supercomputer, and it uses calculations to analyze foes. • Weight: 550.0kg (1212.5 lbs) • Height: 1.6m (5′03″) • Evolves from: Metang +• Egg Group(s): Mineral ───────── Abilities Ability 1: Clear Body diff --git a/testdata/pokemon_defense.golden b/testdata/pokemon_defense.golden index 35648836..a7554600 100644 --- a/testdata/pokemon_defense.golden +++ b/testdata/pokemon_defense.golden @@ -1,6 +1,7 @@ Your selected Pokémon: Dragapult Apparently the Dreepy inside Dragapult’s horns eagerly look forward to being launched out at Mach speeds. + Dragon Ghost @@ -8,6 +9,7 @@ look forward to being launched out at Mach speeds. • Weight: 50.0kg (110.2 lbs) • Height: 3.0m (9′10″) • Evolves from: Drakloak +• Egg Group(s): Dragon, Indeterminate ───────────── Type Defenses Immune: Fighting, Normal diff --git a/testdata/pokemon_defense_ability_immunities.golden b/testdata/pokemon_defense_ability_immunities.golden index 1617e685..4695fd75 100644 --- a/testdata/pokemon_defense_ability_immunities.golden +++ b/testdata/pokemon_defense_ability_immunities.golden @@ -1,6 +1,7 @@ Your selected Pokémon: Gastrodon When its natural enemy attacks, it oozes purple fluid and escapes. + Water Ground @@ -8,6 +9,7 @@ escapes. • Weight: 29.9kg (65.9 lbs) • Height: 0.9m (2′11″) • Evolves from: Shellos +• Egg Group(s): Indeterminate, Water1 ───────────── Type Defenses Immune: Electric diff --git a/testdata/pokemon_image.golden b/testdata/pokemon_image.golden index 192501ff..7e713a66 100644 --- a/testdata/pokemon_image.golden +++ b/testdata/pokemon_image.golden @@ -2,6 +2,7 @@ Your selected Pokémon: Skeledirge The fiery bird changes shape when Skeledirge sings. Rumor has it that the bird was born when the fireball on Skeledirge’s head gained a soul. + Fire Ghost @@ -9,6 +10,7 @@ Skeledirge’s head gained a soul. • Weight: 326.5kg (719.8 lbs) • Height: 1.6m (5′03″) • Evolves from: Crocalor +• Egg Group(s): Ground ───── Image ▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀▀ diff --git a/testdata/pokemon_image_flag_non-valid_size.golden b/testdata/pokemon_image_flag_non-valid_size.golden index 4f24d0f6..7292e67b 100644 --- a/testdata/pokemon_image_flag_non-valid_size.golden +++ b/testdata/pokemon_image_flag_non-valid_size.golden @@ -1,6 +1,7 @@ Your selected Pokémon: Floatzel It floats using its well-developed flotation sac. It assists in the rescues of drowning people. + Water @@ -8,6 +9,7 @@ in the rescues of drowning people. • Weight: 33.5kg (73.9 lbs) • Height: 1.1m (3′07″) • Evolves from: Buizel +• Egg Group(s): Ground, Water1 ───── Image ╭───────────────────────────╮ diff --git a/testdata/pokemon_no_flags_dual_type.golden b/testdata/pokemon_no_flags_dual_type.golden index 078f0eb7..1355d0a5 100644 --- a/testdata/pokemon_no_flags_dual_type.golden +++ b/testdata/pokemon_no_flags_dual_type.golden @@ -1,6 +1,7 @@ Your selected Pokémon: Victini This Pokémon brings victory. It is said that Trainers with Victini always win, regardless of the type of encounter. + Psychic Fire @@ -8,3 +9,4 @@ Victini always win, regardless of the type of encounter. • Weight: 4.0kg (8.8 lbs) • Height: 0.4m (1′04″) • Basic Pokémon +• Egg Group(s): No-Eggs diff --git a/testdata/pokemon_regional_form.golden b/testdata/pokemon_regional_form.golden index 8d3af9ff..ded2cdfb 100644 --- a/testdata/pokemon_regional_form.golden +++ b/testdata/pokemon_regional_form.golden @@ -1,6 +1,7 @@ Your selected Pokémon: Exeggutor Alola Its three heads think independently. However, they are friendly and never appear to squabble. + Grass Dragon @@ -8,6 +9,7 @@ friendly and never appear to squabble. • Weight: 415.6kg (916.2 lbs) • Height: 10.9m (35′09″) • Evolves from: Exeggcute +• Egg Group(s): Plant ────────── Base Stats HP ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 95 diff --git a/testdata/pokemon_regional_form_2.golden b/testdata/pokemon_regional_form_2.golden index cde56f83..a5ef1c41 100644 --- a/testdata/pokemon_regional_form_2.golden +++ b/testdata/pokemon_regional_form_2.golden @@ -1,6 +1,7 @@ Your selected Pokémon: Slowking Galar It has incredible intellect and intuition. Whatever the situation, it remains calm and collected. + Poison Psychic @@ -8,3 +9,4 @@ situation, it remains calm and collected. • Weight: 79.5kg (175.3 lbs) • Height: 1.8m (5′11″) • Evolves from: Slowpoke +• Egg Group(s): Monster, Water1 diff --git a/testdata/pokemon_stats.golden b/testdata/pokemon_stats.golden index 86d40dda..493af74e 100644 --- a/testdata/pokemon_stats.golden +++ b/testdata/pokemon_stats.golden @@ -1,6 +1,7 @@ Your selected Pokémon: Toxicroak It has a poison sac at its throat. When it croaks, the stored poison is churned for greater potency. + Poison Fighting @@ -8,6 +9,7 @@ stored poison is churned for greater potency. • Weight: 44.4kg (97.9 lbs) • Height: 1.3m (4′03″) • Evolves from: Croagunk +• Egg Group(s): Humanshape ────────── Base Stats HP ▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇ 83 From 0bf83391053f66a2eca9634e3c10f955cab85374 Mon Sep 17 00:00:00 2001 From: Christian Sanchez Date: Sat, 11 Oct 2025 15:34:20 -0700 Subject: [PATCH 14/23] updating asset names --- .../pipelines/defs/extract/extract_data.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/card_data/pipelines/defs/extract/extract_data.py b/card_data/pipelines/defs/extract/extract_data.py index f8c9e94e..3aa7c9a4 100644 --- a/card_data/pipelines/defs/extract/extract_data.py +++ b/card_data/pipelines/defs/extract/extract_data.py @@ -42,15 +42,16 @@ def extract_series_data() -> pl.DataFrame: print(e) raise - filtered = [s.model_dump(mode="json") for s in validated if s.id in ["swsh", "sv"]] + filtered = [s.model_dump(mode="json") for s in validated if s.id in ["swsh", "sv", "me"]] return pl.DataFrame(filtered) -@dg.asset(kinds={"API", "Polars", "Pydantic"}) +@dg.asset(kinds={"API", "Polars", "Pydantic"}, name="extract_set_data") def extract_set_data() -> pl.DataFrame: url_list = [ "https://api.tcgdex.net/v2/en/series/swsh", - "https://api.tcgdex.net/v2/en/series/sv" + "https://api.tcgdex.net/v2/en/series/sv", + "https://api.tcgdex.net/v2/en/series/me", ] flat: list[dict] = [] @@ -86,11 +87,10 @@ def extract_set_data() -> pl.DataFrame: return pl.DataFrame([s.model_dump(mode="json") for s in validated]) -@dg.asset(kinds={"API"}) +@dg.asset(kinds={"API"}, name="extract_card_url_from_set_data") def extract_card_url_from_set() -> list: urls = [ - "https://api.tcgdex.net/v2/en/sets/sv01", - "https://api.tcgdex.net/v2/en/sets/sv02", + "https://api.tcgdex.net/v2/en/sets/swsh3" ] all_card_urls = [] # Initialize empty list to collect all URLs @@ -113,7 +113,7 @@ def extract_card_url_from_set() -> list: return all_card_urls -@dg.asset(deps=[extract_card_url_from_set], kinds={"API"}) +@dg.asset(deps=[extract_card_url_from_set], kinds={"API"}, name="extract_card_info") def extract_card_info() -> list: card_url_list = extract_card_url_from_set() cards_list = [] @@ -124,6 +124,7 @@ def extract_card_info() -> list: r.raise_for_status() data = r.json() cards_list.append(data) + # print(f"Retrieved card: {data['id']} - {data.get('name', 'Unknown')}") time.sleep(0.1) except requests.RequestException as e: print(f"Failed to fetch {url}: {e}") @@ -131,7 +132,7 @@ def extract_card_info() -> list: return cards_list -@dg.asset(deps=[extract_card_info], kinds={"Polars"}) +@dg.asset(deps=[extract_card_info], kinds={"Polars"}, name="create_card_dataframe") def create_card_dataframe() -> pl.DataFrame: cards_list = extract_card_info() From ba6f9996070ec1c1808d2e9636b15807f40ecf1b Mon Sep 17 00:00:00 2001 From: Christian Sanchez Date: Sat, 11 Oct 2025 15:36:23 -0700 Subject: [PATCH 15/23] updating soda data check asset --- card_data/pipelines/defs/load/load_data.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/card_data/pipelines/defs/load/load_data.py b/card_data/pipelines/defs/load/load_data.py index 17b31fe4..8ec9a9e0 100644 --- a/card_data/pipelines/defs/load/load_data.py +++ b/card_data/pipelines/defs/load/load_data.py @@ -15,6 +15,7 @@ @dg.asset( deps=[extract_series_data], kinds={"Supabase", "Postgres"}, + name="load_series_data", retry_policy=RetryPolicy(max_retries=3, delay=2, backoff=Backoff.EXPONENTIAL) ) def load_series_data() -> None: @@ -32,9 +33,12 @@ def load_series_data() -> None: raise -@dg.asset(deps=[load_series_data], kinds={"Soda"}, key_prefix=["staging"], name="series") +@dg.asset( + deps=[load_series_data], + kinds={"Soda"}, + name="quality_checks_series" +) def data_quality_check_on_series() -> None: - # Set working directory to where this file is located current_file_dir = Path(__file__).parent print(f"Setting cwd to: {current_file_dir}") @@ -59,12 +63,14 @@ def data_quality_check_on_series() -> None: if result.stderr: print(result.stderr) + if result.returncode != 0: + raise Exception(f"Soda data quality checks failed with return code {result.returncode}") + @dg.asset( deps=[extract_set_data], kinds={"Supabase", "Postgres"}, - key_prefix=["staging"], - name="sets", + name="load_set_data", retry_policy=RetryPolicy(max_retries=3, delay=2, backoff=Backoff.EXPONENTIAL) ) def load_set_data() -> None: @@ -85,8 +91,7 @@ def load_set_data() -> None: @dg.asset( deps=[create_card_dataframe], kinds={"Supabase", "Postgres"}, - key_prefix=["staging"], - name="cards", + name="load_card_data", retry_policy=RetryPolicy(max_retries=3, delay=2, backoff=Backoff.EXPONENTIAL) ) def load_card_data() -> None: From b03deb6da4714d11a3a59b32467227f1023467df Mon Sep 17 00:00:00 2001 From: Christian Sanchez Date: Sat, 11 Oct 2025 15:37:49 -0700 Subject: [PATCH 16/23] adding egg group(s) to pokemon command (#188) --- cmd/pokemon/pokemon.go | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/cmd/pokemon/pokemon.go b/cmd/pokemon/pokemon.go index c4d4a08d..f9ca07b8 100644 --- a/cmd/pokemon/pokemon.go +++ b/cmd/pokemon/pokemon.go @@ -7,6 +7,7 @@ import ( "io" "math" "os" + "sort" "strings" "github.com/charmbracelet/lipgloss" @@ -97,6 +98,18 @@ func PokemonCommand() (string, error) { } } + eggGroup := func(w io.Writer) { + var eggGroupSlice []string + + for _, entry := range pokemonSpeciesStruct.EggGroups { + capitalizedEggGroup := cases.Title(language.English).String(entry.Name) + eggGroupSlice = append(eggGroupSlice, capitalizedEggGroup) + } + + sort.Strings(eggGroupSlice) + fmt.Fprintf(w, "\n%s %s %s", styling.ColoredBullet, "Egg Group(s):", strings.Join(eggGroupSlice, ", ")) + } + typing := func(w io.Writer) { var typeBoxes []string @@ -156,20 +169,22 @@ func PokemonCommand() (string, error) { } var ( - entryOutput bytes.Buffer - typeOutput bytes.Buffer - metricsOutput bytes.Buffer - speciesOutput bytes.Buffer + entryOutput bytes.Buffer + eggGroupOutput bytes.Buffer + typeOutput bytes.Buffer + metricsOutput bytes.Buffer + speciesOutput bytes.Buffer ) entry(&entryOutput) + eggGroup(&eggGroupOutput) typing(&typeOutput) metrics(&metricsOutput) species(&speciesOutput) output.WriteString(fmt.Sprintf( - "Your selected Pokémon: %s\n%s%s%s%s\n", - capitalizedString, entryOutput.String(), typeOutput.String(), metricsOutput.String(), speciesOutput.String(), + "Your selected Pokémon: %s\n%s\n%s%s%s%s\n", + capitalizedString, entryOutput.String(), typeOutput.String(), metricsOutput.String(), speciesOutput.String(), eggGroupOutput.String(), )) if *imageFlag != "" || *shortImageFlag != "" { From e72daa2bac0a64b0d05d2c8ccb5ead3628d4732e Mon Sep 17 00:00:00 2001 From: Christian Sanchez Date: Sat, 11 Oct 2025 15:39:39 -0700 Subject: [PATCH 17/23] updating struct to include egg groups (#188) --- structs/structs.go | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/structs/structs.go b/structs/structs.go index df484daa..90a4e2c9 100644 --- a/structs/structs.go +++ b/structs/structs.go @@ -171,7 +171,11 @@ type PokemonJSONStruct struct { // PokemonSpeciesJSONStruct pokemon-species endpoint from API type PokemonSpeciesJSONStruct struct { - Name string `json:"name"` + Name string `json:"name"` + EggGroups []struct { + Name string `json:"name"` + URL string `json:"url"` + } `json:"egg_groups"` EvolvesFromSpecies struct { Name string `json:"name"` URL string `json:"url"` From 3ea8ee22f7b544cda4bf622bca0fecb0a0860947 Mon Sep 17 00:00:00 2001 From: Christian Sanchez Date: Sat, 11 Oct 2025 15:43:39 -0700 Subject: [PATCH 18/23] initial commit - supabase --- docs/Infrastructure_Guide/supabase.md | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 docs/Infrastructure_Guide/supabase.md diff --git a/docs/Infrastructure_Guide/supabase.md b/docs/Infrastructure_Guide/supabase.md new file mode 100644 index 00000000..72536afa --- /dev/null +++ b/docs/Infrastructure_Guide/supabase.md @@ -0,0 +1,10 @@ +--- +weight: 7 +--- + +# 7. Supabase + +## Create an Account + +Visit the Supabase [sign-up page](https://supabase.com/dashboard/sign-up) to create an account. +Signing in with GitHub is the easiest method. \ No newline at end of file From be66e646a93624ffa8d183e4f017c00536a465af Mon Sep 17 00:00:00 2001 From: Christian Sanchez Date: Sat, 11 Oct 2025 16:02:10 -0700 Subject: [PATCH 19/23] updating RDS instructions --- docs/Infrastructure_Guide/aws.md | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/docs/Infrastructure_Guide/aws.md b/docs/Infrastructure_Guide/aws.md index 70ce4e99..6457b204 100644 --- a/docs/Infrastructure_Guide/aws.md +++ b/docs/Infrastructure_Guide/aws.md @@ -125,13 +125,24 @@ In this project, [PostgreSQL](https://www.postgresql.org/) is the database engin The cost to maintain the database with the project's configuration options come out to ~$15.00 USD. ### Setup Instructions +_**Note:** these are the configuration options that were chosen for this project. Costs are the main driver behind these options. +Feel free to choose any other options that could be more suitable._ 1. Visit the [RDS console](https://console.aws.amazon.com/rds/home). 2. On the **dashboard**, there should be an option **Create a Database**. If not, click on **Databases** on the left menu. Then click **Create Database** in the upper-right. -3. Choose PostgreSQL -4. Choose dev/test -5. Single zone -6. Burstable class -7. t4g.micro instance -8. Change storage to 20GB \ No newline at end of file +3. Under **Engine Options**, choose PostgreSQL. +4. Under **Templates**, choose Dev/Test. If eligible, use Free tier. +5. Under **Availability and Durability**, choose Single Instance Deployment. +6. Under **Settings**, give the database a name and let AWS manage the credentials. +7. Under **Instance Configuration**, choose **Burstable Classes** and then select `t4g.micro` instance. +8. Change storage to the minimum of 20GB. +9. Under **Connectivity**, choose the VPC that was created in the [previous step](#vpc). + * The subnet from the VPC should be already selected. + * Choose **no** for Public Access + * Keep the **default** VPC security group. + * This project does not have a preference on **Availability Zones** and uses the auto-generated **Certificate Authority**. +10. Under **Tags**, create a new tag if desired for resource organization. +11. Under **Database Authentication**, choose _password authentication_. +12. Under **Monitoring**, choose the standard version of _Database Insights_. All other options in this section can be left as default. +13. Review the **Estimated Monthly Costs**, make any changes if necessary, they click on _create database_. \ No newline at end of file From 03e647b6c32f0b533d018e55d10ecdc703ae51c0 Mon Sep 17 00:00:00 2001 From: Christian Sanchez Date: Sat, 11 Oct 2025 16:04:19 -0700 Subject: [PATCH 20/23] adding missed step in RDS instructions --- docs/Infrastructure_Guide/aws.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docs/Infrastructure_Guide/aws.md b/docs/Infrastructure_Guide/aws.md index 6457b204..676886e4 100644 --- a/docs/Infrastructure_Guide/aws.md +++ b/docs/Infrastructure_Guide/aws.md @@ -137,7 +137,8 @@ Feel free to choose any other options that could be more suitable._ 6. Under **Settings**, give the database a name and let AWS manage the credentials. 7. Under **Instance Configuration**, choose **Burstable Classes** and then select `t4g.micro` instance. 8. Change storage to the minimum of 20GB. -9. Under **Connectivity**, choose the VPC that was created in the [previous step](#vpc). +9. Under **Connectivity**, choose to not connect to an EC2 instance. This can be done later. + * Choose the VPC that was created in the [previous step](#vpc). * The subnet from the VPC should be already selected. * Choose **no** for Public Access * Keep the **default** VPC security group. From a8a569b65e712f5227f879a8c81e62201b97da45 Mon Sep 17 00:00:00 2001 From: Christian Sanchez Date: Sun, 12 Oct 2025 12:45:38 -0700 Subject: [PATCH 21/23] fixing typo, updating aws credentials description --- docs/Infrastructure_Guide/terraform.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docs/Infrastructure_Guide/terraform.md b/docs/Infrastructure_Guide/terraform.md index 5c8d1d24..4a4802fa 100644 --- a/docs/Infrastructure_Guide/terraform.md +++ b/docs/Infrastructure_Guide/terraform.md @@ -31,17 +31,17 @@ Terraformer acts as a "reverse Terraform" tool where it can read from created re Although the output can sometimes be a bit verbose, it's an easy way to capture everything to create reproducible builds. ### Profile -Before installing Terraformer, a profile from AWS will need to be configured under `~/.aws/credentials` which is `TOML` like file. +Before installing Terraformer, a profile from AWS will need to be configured under `~/.aws/credentials` which is an INI-like file (AWS credentials format). Configure the `terraform-user` profile from [1. AWS](aws.md#iam) to look like this: -```toml +```ini [terraform-user] aws_access_key_id = aws_secret_access_key = ``` -To retrieve these options, head to IAM > Users > teraform-user > Security Credentials tab +To retrieve these options, head to IAM > Users > terraform-user > Security Credentials tab ### Install * [Install Guide](https://github.com/GoogleCloudPlatform/terraformer?tab=readme-ov-file#installation) From 91c59e2ea5547c6025b3209e7840284305f2aea8 Mon Sep 17 00:00:00 2001 From: Christian Sanchez Date: Sun, 12 Oct 2025 12:46:18 -0700 Subject: [PATCH 22/23] raising error if any DataFrame from a set is empty --- card_data/pipelines/defs/extract/extract_pricing_data.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/card_data/pipelines/defs/extract/extract_pricing_data.py b/card_data/pipelines/defs/extract/extract_pricing_data.py index 18bc97da..15e68cfe 100644 --- a/card_data/pipelines/defs/extract/extract_pricing_data.py +++ b/card_data/pipelines/defs/extract/extract_pricing_data.py @@ -95,6 +95,14 @@ def build_dataframe() -> pl.DataFrame: all_cards = [] for set_number in SET_PRODUCT_MATCHING.keys(): df = pull_product_information(set_number) + + # Raise error if any DataFrame is empty + if df is None or df.shape[1] == 0 or df.is_empty(): + error_msg = f"Empty DataFrame returned for set '{set_number}'. " \ + f"Cannot proceed with drop+replace operation to avoid data loss." + print(colored(" ✖", "red"), error_msg) + raise ValueError(error_msg) + all_cards.append(df) concatenated = pl.concat(all_cards) From 233816704ec6ab39a2305e7dbf4b4f58263349b9 Mon Sep 17 00:00:00 2001 From: Christian <86637723+digitalghost-dev@users.noreply.github.com> Date: Sun, 12 Oct 2025 13:46:11 -0700 Subject: [PATCH 23/23] referencing correct function name in error text Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> --- card_data/pipelines/defs/load/load_pricing_data.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/card_data/pipelines/defs/load/load_pricing_data.py b/card_data/pipelines/defs/load/load_pricing_data.py index d3017013..c9b989dd 100644 --- a/card_data/pipelines/defs/load/load_pricing_data.py +++ b/card_data/pipelines/defs/load/load_pricing_data.py @@ -22,5 +22,5 @@ def load_pricing_data() -> None: ) print(colored(" ✓", "green"), f"Data loaded into {table_name}") except OperationalError as e: - print(colored(" ✖", "red"), "Connection error in load_series_data():", e) + print(colored(" ✖", "red"), "Connection error in load_pricing_data():", e) raise