From 96e82c57997891a4eabcee8ead5acebb5811a428 Mon Sep 17 00:00:00 2001 From: Theo Trosman <49092919@est.ort.edu.ar> Date: Thu, 9 Apr 2026 11:04:58 -0300 Subject: [PATCH] Refactor CSV processing and SQL generation --- loader/load.py | 123 ++++++++++++++++++++++++++++--------------------- 1 file changed, 71 insertions(+), 52 deletions(-) diff --git a/loader/load.py b/loader/load.py index 2650601..4763896 100644 --- a/loader/load.py +++ b/loader/load.py @@ -1,60 +1,79 @@ import csv -import sys, os +import sys +from pathlib import Path -# Reading from a CSV file -with open("mctl.csv", "r") as file: - with open("users.sql", "w") as user_query: - user_query.write( - """ +INPUT_FILE = "mctl.csv" +USER_OUTPUT = "users.sql" +GAME_OUTPUT = "games.sql" + + +def parse_row(row): + user_id = row[1] + last_game_date = row[2] + + attempts = 11 if row[3] == "0" else int(row[4]) + + return { + "user_id": user_id, + "last_game_date": last_game_date, + "attempts": attempts, + } + + +def read_csv(limit=None): + data = [] + + with open(INPUT_FILE, newline="") as file: + reader = csv.reader(file) + next(reader) # skip header + + for i, row in enumerate(reader, start=1): + data.append(parse_row(row)) + + if limit and i >= limit: + break + + return data + + +def generate_users_sql(data): + values = ",\n".join( + f"('{d['user_id']}', '{d['last_game_date']}')" for d in data + ) + + return f""" INSERT INTO public.user (user_id, last_game_date) VALUES +{values} +ON CONFLICT (user_id) +DO UPDATE SET + last_game_date = excluded.last_game_date; """ - ) - with open("games.sql", "w") as game_query: - game_query.write( - f""" - INSERT INTO public.game_count (user_id, attempts, game_count) - VALUES - """ - ) - reader = csv.reader(file) - for i, row in enumerate(reader): - if i == 0: - continue - - user_query.write( - f""" - ('{row[1]}', '{row[2]}'),""" - ) - if row[3] == "0": - attempts = "11" - else: - attempts = row[4] - - game_query.write( - f""" - ('{row[1]}', {attempts}, 1),""" - ) - - if len(sys.argv) > 1 and i > int(sys.argv[1]): - break - game_query.seek(game_query.tell() - 1, os.SEEK_SET) - game_query.write("") - game_query.write( - """ + + +def generate_games_sql(data): + values = ",\n".join( + f"('{d['user_id']}', {d['attempts']}, 1)" for d in data + ) + + return f""" +INSERT INTO public.game_count (user_id, attempts, game_count) +VALUES +{values} ON CONFLICT (user_id, attempts) - DO UPDATE SET +DO UPDATE SET game_count = public.game_count.game_count + 1; - """ - ) +""" - user_query.seek(user_query.tell() - 1, os.SEEK_SET) - user_query.write("") - user_query.write( - """ -ON CONFLICT (user_id) - DO UPDATE SET - last_game_date = excluded.last_game_date -; - """ - ) + +def main(): + limit = int(sys.argv[1]) if len(sys.argv) > 1 else None + + data = read_csv(limit) + + Path(USER_OUTPUT).write_text(generate_users_sql(data)) + Path(GAME_OUTPUT).write_text(generate_games_sql(data)) + + +if __name__ == "__main__": + main()