From ce4e63e07c8a5769abbff47c4b458f2e43f4548f Mon Sep 17 00:00:00 2001 From: master Date: Mon, 27 Apr 2026 15:53:32 +0800 Subject: [PATCH 01/10] =?UTF-8?q?=D0=B8=D0=BD=D1=81=D1=82=D1=80=D1=83?= =?UTF-8?q?=D0=BA=D1=86=D0=B8=D1=8F=20=D0=B4=D0=BB=D1=8F=20=D0=B7=D0=B0?= =?UTF-8?q?=D0=BF=D1=83=D1=81=D0=BA=D0=B0=20duckling?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- utils/doduo/duckling instraction | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 utils/doduo/duckling instraction diff --git a/utils/doduo/duckling instraction b/utils/doduo/duckling instraction new file mode 100644 index 0000000..27fa5eb --- /dev/null +++ b/utils/doduo/duckling instraction @@ -0,0 +1,13 @@ +# в терминале прописать команды + +sudo apt-get update +sudo apt-get install -y libgmp-dev libpcre3-dev build-essential +sudo usermod -aG docker master + +# далее + +docker run -d -p 8000:8000 --name duckling rasa/duckling + +# проверка работы сервера + +docker ps \ No newline at end of file From f4438097c76db46843d595a454c400d13a37f39c Mon Sep 17 00:00:00 2001 From: master Date: Mon, 27 Apr 2026 16:00:22 +0800 Subject: [PATCH 02/10] =?UTF-8?q?=D0=B8=D0=BD=D1=81=D1=82=D1=80=D1=83?= =?UTF-8?q?=D0=BA=D1=86=D0=B8=D1=8F=20=D0=B4=D0=BB=D1=8F=20=D0=B7=D0=B0?= =?UTF-8?q?=D0=BF=D1=83=D1=81=D0=BA=D0=B0=20duckling?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- README.md | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/README.md b/README.md index c4337b9..218b12f 100644 --- a/README.md +++ b/README.md @@ -21,3 +21,23 @@ ## Authors \#TODO + + +# Docker Setup for Duckling + +## Prerequisites +Run the following commands in your terminal to install system dependencies and add your user to the Docker group: + +```bash +# Update package list and install required dependencies +sudo apt-get update +sudo apt-get install -y libgmp-dev libpcre3-dev build-essential + +# Add user 'master' to the docker group +sudo usermod -aG docker master + +#run docker +docker run -d -p 8000:8000 --name duckling rasa/duckling + +# Check container status +docker ps \ No newline at end of file From ea54920bc8fbe7e1d84eef68533fd8759480e6e1 Mon Sep 17 00:00:00 2001 From: AldarArmaev Date: Fri, 22 May 2026 00:45:09 +0800 Subject: [PATCH 03/10] =?UTF-8?q?=D0=94=D0=BE=D0=B1=D0=B0=D0=B2=D0=BB?= =?UTF-8?q?=D0=B5=D0=BD=20=D1=8D=D0=BA=D1=81=D0=BF=D0=B5=D1=80=D0=B8=D0=BC?= =?UTF-8?q?=D0=B5=D0=BD=D1=82=20=D1=81=20Qwen=207b=20=D0=B4=D0=BB=D1=8F=20?= =?UTF-8?q?=D0=B2=D0=BE=D0=BF=D1=80=D0=BE=D1=81-=D0=BE=D1=82=D0=B2=D0=B5?= =?UTF-8?q?=D1=82=20=D0=BF=D0=BE=20=D1=82=D0=B0=D0=B1=D0=BB=D0=B8=D1=86?= =?UTF-8?q?=D0=B5=20=D0=BD=D0=B0=20WikiTableQuestion?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- WTQ/Qwen/Untitled.ipynb | 1275 +++++++++++++++++++++++++++++++++++++++ WTQ/Qwen/config.py | 57 ++ WTQ/Qwen/server.py | 75 +++ 3 files changed, 1407 insertions(+) create mode 100644 WTQ/Qwen/Untitled.ipynb create mode 100644 WTQ/Qwen/config.py create mode 100644 WTQ/Qwen/server.py diff --git a/WTQ/Qwen/Untitled.ipynb b/WTQ/Qwen/Untitled.ipynb new file mode 100644 index 0000000..a8b094e --- /dev/null +++ b/WTQ/Qwen/Untitled.ipynb @@ -0,0 +1,1275 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 5, + "id": "b824dfad-5524-47e2-b804-6f71e0507f49", + "metadata": {}, + "outputs": [], + "source": [ + "import openai\n", + "import time\n", + "import re\n", + "import pandas as pd\n", + "\n", + "def send_message(\n", + " message, \n", + " max_tokens=1000, # Увеличил дефолт, так как таблицы и логи могут быть длинными\n", + " top_p=0.9, \n", + " temperature=0.0, # Для задач интерпретатора строго 0.0 по умолчанию\n", + " server_url=\"http://127.0.0.1:9092/v1\", \n", + " api_key=\"dummy\",\n", + " model_name='Qwen2.5-Coder-7B-Instruct', \n", + " stop=None, # Для обычных ответов стоп-слова лучше сделать опциональными\n", + " retries=3 # Количество попыток при падении сервера\n", + "):\n", + " # Инициализируем клиент OpenAI\n", + " client = openai.OpenAI(base_url=server_url, api_key=api_key)\n", + " \n", + " model_input = [\n", + " { 'role': 'user', 'content': message}\n", + " ]\n", + " \n", + " try:\n", + " print(f\"Generating content with model: {model_name} (Temp: {temperature})\")\n", + " \n", + " response = client.chat.completions.create(\n", + " model=model_name,\n", + " messages=model_input,\n", + " temperature=temperature,\n", + " max_tokens=max_tokens,\n", + " top_p=top_p,\n", + " stop=stop\n", + " )\n", + " \n", + " return True, response.choices[0].message.content\n", + "\n", + " except Exception as e:\n", + " print(f\"\\n[ERROR] Failed to call LLM: {e}\")\n", + " \n", + " # Разбираем ответ сервера, если он есть\n", + " if hasattr(e, 'response') and e.response is not None:\n", + " try:\n", + " error_info = e.response.json() \n", + " code_value = error_info.get('error', {}).get('code', 'unknown_error')\n", + " print(f\"Код ошибки от сервера: {code_value}\")\n", + " except Exception:\n", + " print(f\"Сырой ответ сервера об ошибке: {e.response.text}\")\n", + " \n", + " # Если попытки еще остались — пробуем снова\n", + " if retries > 0:\n", + " print(f\"Waiting 6 seconds before retry... (Remaining retries: {retries})\")\n", + " time.sleep(6)\n", + " return send_message(\n", + " message=message, max_tokens=max_tokens, top_p=top_p, \n", + " temperature=temperature, server_url=server_url, api_key=api_key, \n", + " model_name=model_name, stop=stop, retries=retries-1\n", + " )\n", + " else:\n", + " print(\"All retries failed. Skipping.\")\n", + " return False, None\n", + "\n", + "def parse_panda_code(input_string):\n", + " # Сначала попробуем найти JSON объект с PANDA\n", + " json_pattern = r'\\{[^{}]*(?:CORRECT PANDA|PANDA)\":\\s*(.+?)(?:\\n|$)?\\}'\n", + " json_match = re.search(json_pattern, input_string, re.DOTALL)\n", + " code = None\n", + " pattern = r'\"(?:CORRECT PANDA|PANDA)\":\\s*(.+?)(?:\\n|$)'\n", + " if json_match:\n", + " code = json_match.group(1).strip()\n", + " else:\n", + " match = re.search(pattern, input_string, re.DOTALL)\n", + " if match:\n", + " code = match.group(1).strip()\n", + "\n", + " if code != None:\n", + " if code.startswith('\"') and code.endswith('\"'):\n", + " code = code[1:-1]\n", + " elif code.startswith(\"'\") and code.endswith(\"'\"):\n", + " code = code[1:-1]\n", + " \n", + " return code\n", + " \n", + " return \"\"\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "b78aedf9-c11e-4004-b00f-d661578c3400", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idutterancecontexttargetValue
0nt-0what was the last year where this team was a p...csv/204-csv/590.csv2004
1nt-1in what city did piotr's last 1st place finish...csv/204-csv/622.csvBangkok, Thailand
2nt-2which team won previous to crettyard?csv/204-csv/772.csvWolfe Tones
3nt-3how many more passengers flew to los angeles t...csv/203-csv/515.csv12,467
4nt-4who was the opponent in the first game of the ...csv/204-csv/495.csvDerby County
...............
14144nt-14147who came in last?csv/204-csv/433.csvJavier Díaz
14145nt-14148which album has the highest number of sales bu...csv/204-csv/949.csvVain elämää
14146nt-14149japan finished below how many countries?csv/204-csv/183.csv0
14147nt-14150how many districts have a population density o...csv/204-csv/739.csv31
14148nt-14151what entrant has the most drivers?csv/203-csv/670.csvOwen Racing Organisation
\n", + "

14149 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " id utterance \\\n", + "0 nt-0 what was the last year where this team was a p... \n", + "1 nt-1 in what city did piotr's last 1st place finish... \n", + "2 nt-2 which team won previous to crettyard? \n", + "3 nt-3 how many more passengers flew to los angeles t... \n", + "4 nt-4 who was the opponent in the first game of the ... \n", + "... ... ... \n", + "14144 nt-14147 who came in last? \n", + "14145 nt-14148 which album has the highest number of sales bu... \n", + "14146 nt-14149 japan finished below how many countries? \n", + "14147 nt-14150 how many districts have a population density o... \n", + "14148 nt-14151 what entrant has the most drivers? \n", + "\n", + " context targetValue \n", + "0 csv/204-csv/590.csv 2004 \n", + "1 csv/204-csv/622.csv Bangkok, Thailand \n", + "2 csv/204-csv/772.csv Wolfe Tones \n", + "3 csv/203-csv/515.csv 12,467 \n", + "4 csv/204-csv/495.csv Derby County \n", + "... ... ... \n", + "14144 csv/204-csv/433.csv Javier Díaz \n", + "14145 csv/204-csv/949.csv Vain elämää \n", + "14146 csv/204-csv/183.csv 0 \n", + "14147 csv/204-csv/739.csv 31 \n", + "14148 csv/203-csv/670.csv Owen Racing Organisation \n", + "\n", + "[14149 rows x 4 columns]" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train = pd.read_csv('data/data/training.tsv', sep = '\\t')\n", + "train" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "8e85b283-9458-4fcf-891a-4a34e1f2697d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
YearDivisionLeagueRegular SeasonPlayoffsOpen CupAvg. Attendance
020012USL A-League4th, WesternQuarterfinalsDid not qualify7,169
120022USL A-League2nd, Pacific1st RoundDid not qualify6,260
220032USL A-League3rd, PacificDid not qualifyDid not qualify5,871
320042USL A-League1st, WesternQuarterfinals4th Round5,628
420052USL First Division5thQuarterfinals4th Round6,028
520062USL First Division11thDid not qualify3rd Round5,575
620072USL First Division2ndSemifinals2nd Round6,851
720082USL First Division11thDid not qualify1st Round8,567
820092USL First Division1stSemifinals3rd Round9,734
920102USSF D-2 Pro League3rd, USL (3rd)Quarterfinals3rd Round10,727
\n", + "
" + ], + "text/plain": [ + " Year Division League Regular Season Playoffs \\\n", + "0 2001 2 USL A-League 4th, Western Quarterfinals \n", + "1 2002 2 USL A-League 2nd, Pacific 1st Round \n", + "2 2003 2 USL A-League 3rd, Pacific Did not qualify \n", + "3 2004 2 USL A-League 1st, Western Quarterfinals \n", + "4 2005 2 USL First Division 5th Quarterfinals \n", + "5 2006 2 USL First Division 11th Did not qualify \n", + "6 2007 2 USL First Division 2nd Semifinals \n", + "7 2008 2 USL First Division 11th Did not qualify \n", + "8 2009 2 USL First Division 1st Semifinals \n", + "9 2010 2 USSF D-2 Pro League 3rd, USL (3rd) Quarterfinals \n", + "\n", + " Open Cup Avg. Attendance \n", + "0 Did not qualify 7,169 \n", + "1 Did not qualify 6,260 \n", + "2 Did not qualify 5,871 \n", + "3 4th Round 5,628 \n", + "4 4th Round 6,028 \n", + "5 3rd Round 5,575 \n", + "6 2nd Round 6,851 \n", + "7 1st Round 8,567 \n", + "8 3rd Round 9,734 \n", + "9 3rd Round 10,727 " + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "csv = pd.read_csv('data/'+ train.context[0])\n", + "csv" + ] + }, + { + "cell_type": "code", + "execution_count": 124, + "id": "651a5fb2-4dda-4544-8282-76455880387f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Year Division League Regular Season Playoffs \\\n", + "0 2001 2 USL A-League 4th, Western Quarterfinals \n", + "1 2002 2 USL A-League 2nd, Pacific 1st Round \n", + "2 2003 2 USL A-League 3rd, Pacific Did not qualify \n", + "3 2004 2 USL A-League 1st, Western Quarterfinals \n", + "4 2005 2 USL First Division 5th Quarterfinals \n", + "5 2006 2 USL First Division 11th Did not qualify \n", + "6 2007 2 USL First Division 2nd Semifinals \n", + "7 2008 2 USL First Division 11th Did not qualify \n", + "8 2009 2 USL First Division 1st Semifinals \n", + "9 2010 2 USSF D-2 Pro League 3rd, USL (3rd) Quarterfinals \n", + "\n", + " Open Cup Avg. Attendance \n", + "0 Did not qualify 7,169 \n", + "1 Did not qualify 6,260 \n", + "2 Did not qualify 5,871 \n", + "3 4th Round 5,628 \n", + "4 4th Round 6,028 \n", + "5 3rd Round 5,575 \n", + "6 2nd Round 6,851 \n", + "7 1st Round 8,567 \n", + "8 3rd Round 9,734 \n", + "9 3rd Round 10,727 \n" + ] + } + ], + "source": [ + "df_table = pd.read_csv('data/' + train.context.iloc[i])\n", + "print(df_table)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "1ce82a05-c0dd-438b-83df-dc619d3f859b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Отправляем корректный запрос на локальный Qwen...\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "True ```json\n", + "{\n", + " \"PANDA\": \"df.loc[df['Division'] == 2 & df['League'] == 'USL A-League', 'Year'].max()\"\n", + "}\n", + "```\n", + "\n", + "--- Ответ от модели ---\n", + "```json\n", + "{\n", + " \"PANDA\": \"df.loc[df['Division'] == 2 & df['League'] == 'USL A-League', 'Year'].max()\"\n", + "}\n", + "```\n", + "You are a Python expert specializing in pandas. You are given a question and a table. Your task is to translate the given natural language question into\n", + "a single-line pandas expression. This expression, which acts like a query, must\n", + "be valid and executable so that running the pandas expression will output the\n", + "answer to the question. Consider the following:\n", + "1. The table is represented as a pandas DataFrame named df.\n", + "2. Do not include explanations, comments, or multiline outputs.\n", + "3. Ensure the output is concise, correct, and when run, it outputs the correct\n", + "given answer, and strictly follows the Json format: {{\"PANDA\": \"\"}}\n", + "\n", + "### Table schema\n", + " Year Division League Regular Season Playoffs Open Cup Avg. Attendance\n", + " 2001 2 USL A-League 4th, Western Quarterfinals Did not qualify 7,169\n", + " 2002 2 USL A-League 2nd, Pacific 1st Round Did not qualify 6,260\n", + " 2003 2 USL A-League 3rd, Pacific Did not qualify Did not qualify 5,871\n", + " 2004 2 USL A-League 1st, Western Quarterfinals 4th Round 5,628\n", + " 2005 2 USL First Division 5th Quarterfinals 4th Round 6,028\n", + " 2006 2 USL First Division 11th Did not qualify 3rd Round 5,575\n", + " 2007 2 USL First Division 2nd Semifinals 2nd Round 6,851\n", + " 2008 2 USL First Division 11th Did not qualify 1st Round 8,567\n", + " 2009 2 USL First Division 1st Semifinals 3rd Round 9,734\n", + " 2010 2 USSF D-2 Pro League 3rd, USL (3rd) Quarterfinals 3rd Round 10,727\n", + "\n", + "### Query\n", + "what was the last year where this team was a part of the usl a-league?\n", + "\n", + "\n", + "\n", + "--- Извлеченный код Pandas ---\n", + "[df.loc[df['Division'] == 2 & df['League'] == 'USL A-League', 'Year'].max()]\n" + ] + } + ], + "source": [ + "import config\n", + "import importlib\n", + "import pandas as pd\n", + "\n", + "# 0. Принудительно перезагружаем конфиг\n", + "importlib.reload(config) \n", + "\n", + "# Предположим, мы берем первый пример (i = 0)\n", + "i = 0\n", + "\n", + "# ИСПРАВЛЕНИЕ: Убедитесь, что переменная `train` у вас определена выше в коде (например, train = pd.read_csv('...'))\n", + "\n", + "# 1. Достаем чистый текст вопроса\n", + "query_text = train.utterance.iloc[i]\n", + "\n", + "# 2. Читаем таблицу и превращаем ЕЁ В ТЕКСТ (в формат Markdown)\n", + "table_text = pd.read_csv('data/' + train.context.iloc[i]).to_string(index=False)\n", + "#table_text = df_table.to_markdown(index=False) \n", + "\n", + "# 3. Безопасная подстановка данных через .replace() вместо .format()\n", + "# Это защитит от ошибок, если в промпте есть другие фигурные скобки {}\n", + "full_message = config.system_prompt.replace(\"{table}\", table_text).replace(\"{query}\", query_text)\n", + "\n", + "print(\"Отправляем корректный запрос на локальный Qwen...\")\n", + "\n", + "# ИСПРАВЛЕНИЕ: Передаем именно full_message\n", + "success, response = send_message(\n", + " message=full_message,\n", + ")\n", + "\n", + " \n", + "print(success, response)\n", + "if success:\n", + " print(\"\\n--- Ответ от модели ---\")\n", + " print(response)\n", + " print(full_message)\n", + " parsed_code = parse_panda_code(response)\n", + " print(\"\\n--- Извлеченный код Pandas ---\")\n", + " print(f\"[{parsed_code}]\")\n", + "else:\n", + " print(\"Не удалось получить ответ от сервера.\")" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "id": "2159706c-e803-4a50-af6c-00a721ae3508", + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "import re\n", + "\n", + "def exec_pandas(json_str):\n", + " # Удаляем markdown-обертку ```json и ```\n", + " cleaned = re.sub(r'^```json\\n|\\n```$', '', json_str.strip())\n", + " return json.loads(cleaned)[\"PANDA\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "id": "77ee59ff-4cb0-46bb-a3ce-8ad01590ac47", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n" + ] + } + ], + "source": [ + "import importlib\n", + "import config\n", + "importlib.reload(config) \n", + "\n", + "n = 10\n", + "\n", + "code = {\n", + " 'correct': '',\n", + " 're_correct': '',\n", + " 're_uncorrect': '' # исправлена опечатка\n", + "}\n", + "\n", + "for i in range(n):\n", + " query_text = train.utterance.iloc[i]\n", + " table_path = 'data/' + train.context.iloc[i]\n", + " table_text = pd.read_csv(table_path).to_string(index=False)\n", + " full_message = config.system_prompt.replace(\"{table}\", table_text).replace(\"{query}\", query_text)\n", + "\n", + " success, response = send_message(message=full_message)\n", + " \n", + " if not success:\n", + " continue # или обработка ошибки\n", + "\n", + " label = train.targetValue.iloc[i] # вынести сюда, чтобы была доступна везде\n", + "\n", + " try:\n", + " df = pd.read_csv(table_path)\n", + " result = eval(exec_pandas(response))\n", + " if result == label:\n", + " code['correct'] += \" \" + exec_pandas(response)\n", + " else:\n", + " logic_message = config.logic_prompt.replace(\"{table}\", table_text).replace(\"{query}\", query_text).replace(\"{label}\", str(label)).replace(\"{pandas}\", exec_pandas(response))\n", + " success2, response2 = send_message(message=logic_message)\n", + " if success2:\n", + " try:\n", + " if eval(exec_pandas(response2)) == label:\n", + " code[\"re_correct\"] += \" \" + exec_pandas(response2)\n", + " except:\n", + " code[\"re_uncorrect\"] += \" \" + exec_pandas(response2)\n", + " except Exception as e:\n", + " # label уже определена\n", + " correct_message = config.correct_prompt.replace(\"{table}\", table_text).replace(\"{query}\", query_text).replace(\"{label}\", str(label)).replace(\"{pandas}\", exec_pandas(response))\n", + " success2, response2 = send_message(message=correct_message)\n", + " if success2:\n", + " try:\n", + " if eval(exec_pandas(response2)) == label:\n", + " code[\"re_correct\"] += \" \" + exec_pandas(response2)\n", + " except:\n", + " code[\"re_uncorrect\"] += \" \" + exec_pandas(response2)" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "id": "97549381-ebfe-4cee-846b-bdd7bcb8feae", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "[0] Execution error: operation 'rand_' not supported for dtype 'str' with object of type \n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "[3] Execution error: unsupported operand type(s) for -: 'str' and 'str'\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "[5] Execution error: operation 'sub' not supported for dtype 'str' with dtype 'datetime64[us]'\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "[6] Execution error: Expecting ',' delimiter: line 2 column 27 (char 28)\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "[8] Execution error: 'Full house'\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "Correct: 2, Logic: 0, Syntax: 0, Failed: 8\n", + "Total processed: 10\n" + ] + } + ], + "source": [ + "correct_count = 0\n", + "logic_count = 0\n", + "syntax_count = 0\n", + "failed_count = 0\n", + "\n", + "n = 10\n", + "\n", + "code = {\n", + " 'correct': '',\n", + " 're_correct': '',\n", + " 're_uncorrect': ''\n", + "}\n", + "\n", + "for i in range(n):\n", + " query_text = train.utterance.iloc[i]\n", + " table_path = 'data/' + train.context.iloc[i]\n", + " table_text = pd.read_csv(table_path).to_string(index=False)\n", + " full_message = config.system_prompt.replace(\"{table}\", table_text).replace(\"{query}\", query_text)\n", + "\n", + " success, response = send_message(message=full_message)\n", + " \n", + " if not success:\n", + " failed_count += 1\n", + " print(f\"[{i}] send_message failed\")\n", + " continue\n", + "\n", + " label = train.targetValue.iloc[i]\n", + " pandas_code = exec_pandas(response)\n", + " \n", + " if not pandas_code:\n", + " failed_count += 1\n", + " print(f\"[{i}] No pandas code extracted\")\n", + " continue\n", + "\n", + " # Первая попытка: выполнить сгенерированный код\n", + " try:\n", + " df = pd.read_csv(table_path)\n", + " result = eval(pandas_code)\n", + " \n", + " if result == label:\n", + " code['correct'] += \" \" + pandas_code\n", + " correct_count += 1\n", + " else:\n", + " # Логическая коррекция\n", + " logic_message = config.logic_prompt.replace(\"{table}\", table_text).replace(\"{query}\", query_text).replace(\"{label}\", str(label)).replace(\"{pandas}\", pandas_code)\n", + " success2, response2 = send_message(message=logic_message)\n", + " \n", + " if success2:\n", + " pandas_code2 = exec_pandas(response2)\n", + " try:\n", + " if eval(pandas_code2) == label:\n", + " code[\"re_correct\"] += \" \" + pandas_code2\n", + " logic_count += 1\n", + " else:\n", + " code[\"re_uncorrect\"] += \" \" + pandas_code2\n", + " failed_count += 1\n", + " except:\n", + " code[\"re_uncorrect\"] += \" \" + pandas_code2\n", + " failed_count += 1\n", + " else:\n", + " failed_count += 1\n", + " \n", + " except Exception as e:\n", + " # Синтаксическая коррекция (код не выполнился из-за ошибки)\n", + " print(f\"[{i}] Execution error: {e}\")\n", + " \n", + " syntax_message = config.correct_prompt.replace(\"{table}\", table_text).replace(\"{query}\", query_text).replace(\"{label}\", str(label)).replace(\"{pandas}\", pandas_code)\n", + " success2, response2 = send_message(message=syntax_message)\n", + " \n", + " if success2:\n", + " pandas_code2 = exec_pandas(response2)\n", + " try:\n", + " if eval(pandas_code2) == label:\n", + " code[\"re_correct\"] += \" \" + pandas_code2\n", + " syntax_count += 1\n", + " else:\n", + " code[\"re_uncorrect\"] += \" \" + pandas_code2\n", + " failed_count += 1\n", + " except:\n", + " code[\"re_uncorrect\"] += \" \" + pandas_code2\n", + " failed_count += 1\n", + " else:\n", + " failed_count += 1\n", + "\n", + "print(f\"Correct: {correct_count}, Logic: {logic_count}, Syntax: {syntax_count}, Failed: {failed_count}\")\n", + "print(f\"Total processed: {correct_count + logic_count + syntax_count + failed_count}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "id": "5565eca2-57ef-4543-bd32-af37c3e7ac68", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'correct': \" df.loc[df['Position'] == '1st', 'Venue'].iloc[-1] df['Opponent'].iloc[0]\",\n", + " 're_correct': '',\n", + " 're_uncorrect': \" df.loc[(df['Division'] == 2) & (df['League'] == 'USL A-League'), 'Year'].max() df.loc[df['Team'] == 'Wolfe Tones', 'Years won'].iloc[0] - 1 (df.loc[df['City'] == 'United States, Los Angeles', 'Passengers'].iloc[0] - df.loc[df['City'] == 'Canada, Saskatoon', 'Passengers'].iloc[0]) len(df[(df['Left office'].dt.to_period('D') - df['Took office'].dt.to_period('D')).astype(int') >= 1095]) df['Away team'].iloc[0] df.loc[df['Name in English'] == 'Lake Palas Tuzla', 'Depth'].values[0] df.loc['Full house', '4 credits'] df[(df['Position'] == df.loc[3, 'Position']) & (df['Player'] != 'Siim Ennemuist')]['Player'].tolist()\"}" + ] + }, + "execution_count": 73, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "code" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "id": "63549786-67fd-4212-91eb-eca42135a8a0", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "df.loc[df['Division'] == 2 & df['League'] == 'USL A-League', 'Year'].max()\n", + "df.loc[df['Position'] == '1st', 'Venue'].iloc[-1]\n", + "df.loc[df['Team'] == 'Crettyard', 'Years won'].iloc[0] - 1\n", + "(df.loc[df['City'] == 'United States, Los Angeles', 'Passengers'].values[0] - df.loc[df['City'] == 'Canada, Saskatoon', 'Passengers'].values[0])\n", + "df['Opponent'].iloc[0]\n", + "len(df[df['Left office'] - pd.to_datetime(df['Took office']) >= pd.Timedelta(days=1095)])\n", + "df['Away team'].iloc[0]\n", + "df.loc[df['Name in English'].isin(['Lake Tuz', 'Lake Palas Tuzla']), 'Depth'].max()\n", + "df.loc['Full house', '4 credits']\n", + "df[df['Position'] == df.loc[3, 'Position']]['Player'].tolist()\n" + ] + } + ], + "source": [ + "for i in range(len(code)):\n", + " current code = exec_pandas(code[i])\n", + " df = pd.read_csv('data/' + train.context.iloc[i])\n", + " target = train.targetValue.iloc[i]" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "id": "1219cf2d-55b4-4cf8-9ee0-93702e2d1ce9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{0: '```json\\n{\\n \"PANDA\": \"df.loc[df[\\'Division\\'] == 2 & df[\\'League\\'] == \\'USL A-League\\', \\'Year\\'].max()\"\\n}\\n```',\n", + " 1: '```json\\n{\\n \"PANDA\": \"df.loc[df[\\'Position\\'] == \\'1st\\', \\'Venue\\'].iloc[-1]\"\\n}\\n```',\n", + " 2: '```json\\n{\\n \"PANDA\": \"df.loc[df[\\'Team\\'] == \\'Crettyard\\', \\'Years won\\'].iloc[0] - 1\"\\n}\\n```',\n", + " 3: '```json\\n{\\n \"PANDA\": \"(df.loc[df[\\'City\\'] == \\'United States, Los Angeles\\', \\'Passengers\\'].values[0] - df.loc[df[\\'City\\'] == \\'Canada, Saskatoon\\', \\'Passengers\\'].values[0])\"\\n}\\n```',\n", + " 4: '```json\\n{\\n \"PANDA\": \"df[\\'Opponent\\'].iloc[0]\"\\n}\\n```',\n", + " 5: '```json\\n{\\n \"PANDA\": \"len(df[df[\\'Left office\\'] - pd.to_datetime(df[\\'Took office\\']) >= pd.Timedelta(days=1095)])\"\\n}\\n```',\n", + " 6: '```json\\n{\\n \"PANDA\": \"df[\\'Away team\\'].iloc[0]\"\\n}\\n```',\n", + " 7: '```json\\n{\\n \"PANDA\": \"df.loc[df[\\'Name in English\\'].isin([\\'Lake Tuz\\', \\'Lake Palas Tuzla\\']), \\'Depth\\'].max()\"\\n}\\n```',\n", + " 8: '```json\\n{\\n \"PANDA\": \"df.loc[\\'Full house\\', \\'4 credits\\']\"\\n}\\n```',\n", + " 9: '```json\\n{\\n \"PANDA\": \"df[df[\\'Position\\'] == df.loc[3, \\'Position\\']][\\'Player\\'].tolist()\"\\n}\\n```'}" + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "code" + ] + }, + { + "cell_type": "code", + "execution_count": 156, + "id": "8c446433-f113-4c4f-a791-adac6e94a19a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "```json\n", + "{\"PANDA\": \"df[df['League'] == 'USL A-League'].index[-1].year\"}\n", + "```\n", + " Year Division League Regular Season Playoffs Open Cup Avg. Attendance\n", + " 2001 2 USL A-League 4th, Western Quarterfinals Did not qualify 7,169\n", + " 2002 2 USL A-League 2nd, Pacific 1st Round Did not qualify 6,260\n", + " 2003 2 USL A-League 3rd, Pacific Did not qualify Did not qualify 5,871\n", + " 2004 2 USL A-League 1st, Western Quarterfinals 4th Round 5,628\n", + " 2005 2 USL First Division 5th Quarterfinals 4th Round 6,028\n", + " 2006 2 USL First Division 11th Did not qualify 3rd Round 5,575\n", + " 2007 2 USL First Division 2nd Semifinals 2nd Round 6,851\n", + " 2008 2 USL First Division 11th Did not qualify 1st Round 8,567\n", + " 2009 2 USL First Division 1st Semifinals 3rd Round 9,734\n", + " 2010 2 USSF D-2 Pro League 3rd, USL (3rd) Quarterfinals 3rd Round 10,727\n", + "what was the last year where this team was a part of the usl a-league?\n", + "```json\n", + "df[df['Position'] == '1st']['Venue'].iloc[-1]\n", + "```\n", + " Year Competition Venue Position Event Notes\n", + " 2001 World Youth Championships Debrecen, Hungary 2nd 400 m 47.12\n", + " 2001 World Youth Championships Debrecen, Hungary 1st Medley relay 1:50.46\n", + " 2001 European Junior Championships Grosseto, Italy 1st 4x400 m relay 3:06.12\n", + " 2003 European Junior Championships Tampere, Finland 3rd 400 m 46.69\n", + " 2003 European Junior Championships Tampere, Finland 2nd 4x400 m relay 3:08.62\n", + " 2005 European U23 Championships Erfurt, Germany 11th (sf) 400 m 46.62\n", + " 2005 European U23 Championships Erfurt, Germany 1st 4x400 m relay 3:04.41\n", + " 2005 Universiade Izmir, Turkey 7th 400 m 46.89\n", + " 2005 Universiade Izmir, Turkey 1st 4x400 m relay 3:02.57\n", + " 2006 World Indoor Championships Moscow, Russia 2nd (h) 4x400 m relay 3:06.10\n", + " 2006 European Championships Gothenburg, Sweden 3rd 4x400 m relay 3:01.73\n", + " 2007 European Indoor Championships Birmingham, United Kingdom 3rd 4x400 m relay 3:08.14\n", + " 2007 Universiade Bangkok, Thailand 7th 400 m 46.85\n", + " 2007 Universiade Bangkok, Thailand 1st 4x400 m relay 3:02.05\n", + " 2008 World Indoor Championships Valencia, Spain 4th 4x400 m relay 3:08.76\n", + " 2008 Olympic Games Beijing, China 7th 4x400 m relay 3:00.32\n", + " 2009 Universiade Belgrade, Serbia 2nd 4x400 m relay 3:05.69\n", + "in what city did piotr's last 1st place finish occur?\n", + "```json\n", + "df[df['County'] == 'Laois']['Team'].iloc[0]\n", + "```\n", + " Team County Wins Years won\n", + " Greystones Wicklow 1 2011\n", + "Ballymore Eustace Kildare 1 2010\n", + " Maynooth Kildare 1 2009\n", + " Ballyroan Abbey Laois 1 2008\n", + " Fingal Ravens Dublin 1 2007\n", + " Confey Kildare 1 2006\n", + " Crettyard Laois 1 2005\n", + " Wolfe Tones Meath 1 2004\n", + " Dundalk Gaels Louth 1 2003\n", + "which team won previous to crettyard?\n", + "```json\n", + "df[(df['City'] == 'Los Angeles') & (df['Ranking'] > 3)]['Passengers'].sum() - df[df['City'] == 'Saskatoon']['Passengers'].sum()\n", + "```\n", + " Rank City Passengers Ranking Airline\n", + " 1 United States, Los Angeles 14,749 NaN Alaska Airlines\n", + " 2 United States, Houston 5,465 NaN United Express\n", + " 3 Canada, Calgary 3,761 NaN Air Transat, WestJet\n", + " 4 Canada, Saskatoon 2,282 4.0 NaN\n", + " 5 Canada, Vancouver 2,103 NaN Air Transat\n", + " 6 United States, Phoenix 1,829 1.0 US Airways\n", + " 7 Canada, Toronto 1,202 1.0 Air Transat, CanJet\n", + " 8 Canada, Edmonton 110 NaN NaN\n", + " 9 United States, Oakland 107 NaN NaN\n", + "how many more passengers flew to los angeles than to saskatoon from manzanillo airport in 2013?\n", + "```pandas\n", + "df[df['Date'] == '15 August 1987']['Opponent']\n", + "```\n", + " Date Opponent Venue Result Attendance Scorers\n", + " 15 August 1987 Derby County Away 0–1 17,204 —\n", + " 18 August 1987 Coventry City Home 0–1 09,380 —\n", + " 22 August 1987 West Ham United Home 2–2 08,073 Harford (2)\n", + " 29 August 1987 Chelsea Away 0–3 16,075 —\n", + " 31 August 1987 Arsenal Home 1–1 08,745 Wilson (pen)\n", + " 5 September 1987 Oxford United Away 5–2 06,804 Breacker, Harford, Hill, Nwajiobi, B. Stein\n", + "12 September 1987 Everton Home 2–1 08,124 Hill, B. Stein\n", + "19 September 1987 Charlton Athletic Away 0–1 05,002 —\n", + "26 September 1987 Queens Park Rangers Away 0–2 11,175 —\n", + " 3 October 1987 Manchester United Home 1–1 09,137 Harford\n", + " 10 October 1987 Portsmouth Away 1–3 12,391 Harford (pen)\n", + " 17 October 1987 Wimbledon Home 2–0 07,018 B. Stein, Wilson\n", + " 24 October 1987 Liverpool Home 0–1 11,997 —\n", + " 7 November 1987 Newcastle United Home 4–0 07,638 Nwajiobi, B. Stein, M. Stein (2)\n", + " 14 November 1987 Sheffield Wednesday Away 2–0 16,960 Allinson, M. Stein\n", + " 21 November 1987 Tottenham Hotspur Home 2–0 10,091 Allinson (2)\n", + " 5 December 1987 Norwich City Home 1–2 07,002 B. Stein\n", + " 12 December 1987 Watford Away 1–0 12,152 Foster\n", + " 18 December 1987 Southampton Home 2–2 06,618 Harford, McDonough\n", + " 26 December 1987 Everton Away 0–2 32,128 —\n", + " 28 December 1987 Charlton Athletic Home 1–0 07,243 Wilson\n", + " 1 January 1988 Chelsea Home 3–0 08,018 Harford, B. Stein, M. Stein\n", + " 2 January 1988 West Ham United Away 1–1 16,716 M. Stein\n", + " 16 January 1988 Derby County Home 1–0 07,175 McDonough\n", + " 6 February 1988 Oxford United Home 7–4 08,063 Harford (2), McDonough, B.Stein, M.Stein (3)\n", + " 13 February 1988 Arsenal Away 1–2 22,612 M.Stein\n", + " 5 March 1988 Wimbledon Away 0–2 04,854 —\n", + " 15 March 1988 Coventry City Away 0–4 13,711 —\n", + " 29 March 1988 Portsmouth Home 4–1 06,740 B.Stein, M.Stein, Wilson, own goal\n", + " 2 April 1988 Newcastle United Away 0–4 20,752 —\n", + " 5 April 1988 Sheffield Wednesday Home 2–2 07,337 McDonough, B. Stein\n", + " 12 April 1988 Manchester United Away 0–3 28,830 —\n", + " 19 April 1988 Queens Park Rangers Home 2–1 06,735 Foster, Wilson (pen)\n", + " 30 April 1988 Norwich City Away 2–2 13,171 M. Stein, Wilson (pen)\n", + " 2 May 1988 Watford Home 2–1 10,409 Oldfield, Wilson (pen)\n", + " 4 May 1988 Tottenham Hotspur Away 1–2 15,437 Grimes\n", + " 7 May 1988 Southampton Away 1–1 12,722 Wilson\n", + " 9 May 1988 Liverpool Away 1–1 30,374 Oldfield\n", + " 13 May 1988 Nottingham Forest Home 1–1 09,108 Donaghy\n", + " 15 May 1988 Nottingham Forest Away 1–1 13,106 Oldfield\n", + "who was the opponent in the first game of the season?\n", + "```json\n", + "{\"PANDA\": \"len(df[df['Took office'] <= df['Left office'].shift()])\"}\n", + "```\n", + " Unnamed: 0 Name Took office Left office Party Notes/Events\n", + " 11 William McCreery March 4, 1803 March 3, 1809 Democratic Republican NaN\n", + " 12 Alexander McKim March 4, 1809 March 3, 1815 Democratic Republican NaN\n", + " 13 William Pinkney March 4, 1815 April 18, 1816 Democratic Republican Resigned to accept position as Minister Plenipotentiary to Russia\n", + " 14 Peter Little September 2, 1816 March 3, 1823 Democratic Republican NaN\n", + " 14 Peter Little March 4, 1823 March 3, 1825 Jacksonian DR NaN\n", + " 14 Peter Little March 4, 1825 March 3, 1829 Adams NaN\n", + " 15 Benjamin C. Howard March 4, 1829 March 3, 1833 Jacksonian NaN\n", + "how many people stayed at least 3 years in office?\n", + "```python\n", + "df['Away team'].iloc[0]\n", + "```\n", + " Tie no Home team Score Away team\n", + " 49 Dalsjöfors GoIF (WC) 1-4 Varbergs GIF (D3)\n", + " 50 Sjömarkens IF (D4) 1-4 BK Slätta Damm (D3)\n", + " 51 IF Tymer (D4) 0-3 Kållereds SK (D3)\n", + " 52 IFK Hjo (WC) 0-4 Nässjö FF (D3)\n", + " 53 Falköpings FK (D4) 2-0 Gånghesters SK (D4)\n", + " 54 Ankarsrums IS (WC) 1-2 Linköpings FF (D3)\n", + " 55 Rödsle BK (D4) 1-0 (gg) Skeninge IK (D4)\n", + " 56 Lindås BK (D4) 1-3 Hultsfreds FK (D3)\n", + " 57 Hvetlanda GIF (D4) 0-1 Åhus Horna BK (D3)\n", + " 58 Bredaryds IK (D4) 3-0 Ulricehamns IFK (D3)\n", + " 59 Hovslätts IK (D4) 0-9 Tidaholms GIF (D2)\n", + " 60 Torpa AIS (D4) 0-2 BK Zeros (D3)\n", + " 61 Fiskeby IF (WC) 2-1 (gg) Västerviks FF (D4)\n", + " 62 Gnösjö IF (D4) 1-3 Skövde AIK (D2)\n", + " 63 Sävsjö FF (D4) 5-3 Skillingaryds IS (D4)\n", + " 64 Boxholms IF (WC) 1-2 Tranås AIF (D3)\n", + " 65 LSW IF (D4) 2-1 Husqvarna FF (D2)\n", + " 66 Lessebo GoIF (D4) 0-1 Listerby IK (D4)\n", + " 67 Rörviks IF (D4) 0-2 Lunds BK (D2)\n", + " 68 Lagans AIK (D4) 0-1 Högaborgs BK (D2)\n", + " 69 IF Eksjö (D4) 1-4 Kalmar FF (D2)\n", + " 70 Limmareds IF (D4) 1-5 Växjö Norra IF (D2)\n", + " 71 Bankeryds SK (D4) 4-1 Hjulsbro IK (D2)\n", + " 72 Skultorps IF (D4) 0-2 BK Forward (D2)\n", + " 73 Gullspångs IF (D4) 0-7 Rynninge IK (D3)\n", + " 74 Skara FC (D4) 0-4 Karlslunds IF (D3)\n", + " 75 Bråtens IK (D4) 0-4 Vivalla-Lundby IF (D3)\n", + " 76 Finnerödja IF(D4) 3-1 IFK Mariestad (D4)\n", + " 77 Sköllersta IF (D4) 1-3 Hemgårdarnas BK (D4)\n", + " 78 Simonstorps IF (D4) 0-5 Nyköpings BIS (D2)\n", + " 79 Ringarums IF (D4) 1-4 Värmbols FC (D4)\n", + " 80 Dagsbergs IF (D4) 1-0 Malmköpings IF (D4)\n", + " 81 Katrineholms SK (D4) 0-2 BK Kenty (D4)\n", + " 82 Härad IF (D4) 2-3 (gg) IFK Västerås (D2)\n", + " 83 Kolsva IF (D4) 0-3 Karlstad BK (D2)\n", + " 84 Laxå IF (D4) 0-4 IF Sylvia (D2)\n", + " 85 Ransta IK (D4) 1-3 IFK Hallsberg (D4)\n", + " 86 Skyllbergs IK (WC) 0-4 IFK Kristinehamn (D4)\n", + " 87 Filipstads FF (D4) 3-1 Kungsörs SK (D4)\n", + " 88 Hallstahammars SK (D4) 0-7 IFK Eskilstuna (D2)\n", + " 89 BK Hird (D4) 0-5 Hargs BK (D2)\n", + " 90 Vretstorps IF (WC) 1-5 IFK Ölme (D3)\n", + " 91 Frövi IK (WC) 1-9 Skiljebo SK (D3)\n", + " 92 IF Rune (WC) 1-3 Gnesta FF (D3)\n", + " 93 Västerås BK 30 (WC) 0-8 Örebro SK Ungdom (D4)\n", + " 94 VoIF Diana (WC) 4-0 Enskede IK (D4)\n", + " 95 New Mill FF (WC) 4-2 Värtans SK (D3)\n", + " 96 Runtuna IK/Löthen (WC) 1-2 Huddinge IF (D3)\n", + "who is the first away team on the chart\n", + "```pandas\n", + "df[df['Name in Turkish'].str.contains('Tuz')]['Depth']\n", + "```\n", + " Name in English Name in Turkish Area (km2) Depth Location (districts and/or provinces)\n", + " Lake Van Van Gölü 3755 km2 451 m Van, Bitlis\n", + " Lake Tuz Tuz Gölü 1500 km2 2 m Aksaray, Ankara, Konya\n", + " Lake Beyşehir Beyşehir Gölü 656 km2 10 m Beyşehir in Konya, Isparta\n", + " Lake Eğirdir Eğirdir Gölü 482 km2 NaN Isparta\n", + " Lake İznik İznik Gölü 308 km2 NaN İznik in Bursa, Yalova\n", + " Lake Burdur Burdur Gölü 200 km2 NaN Burdur, Isparta\n", + " Lake Manyas Manyas Gölü 166 km2 NaN Balıkesir\n", + " Lake Acıgöl Acıgöl 153 km2 NaN Denizli, Afyonkarahisar\n", + " Lake Uluabat Uluabat Gölü 134 km2 1–2 m Bursa\n", + " Lake Çıldır Çıldır Gölü 115 km2 NaN Ardahan, Kars\n", + " Lake Palas Tuzla Palas Tuzla Gölü 106 km2 15 m Palas/Kayseri\n", + " Lake Akşehir Akşehir Gölü 105 km2 NaN Akşehir in Konya, Afyonkarahisar\n", + " Lake Eber Eber Gölü 104 km2 NaN Afyonkarahisar\n", + " Lake Erçek Erçek Gölü 98 km2 NaN Van\n", + " Lake Hazar Hazar Gölü 86 km2 NaN Elazığ\n", + " Lake Bafa Bafa Gölü 60 km2 NaN Aydın, Muğla\n", + " Lake Köyceğiz Köyceğiz Gölü 52 km2 NaN Köyceğiz in Muğla\n", + " Lake Işıklı Işıklı Gölü 49 km2 NaN Denizli\n", + " Lake Nazik Nazik Gölü 48 km2 NaN Bitlis\n", + " Lake Sapanca Sapanca Gölü 47 km2 NaN Sakarya Province\n", + " Lake Salda Salda Gölü 45 km2 184 m Burdur\n", + " Lake Yay Yay Gölü 37 km2 NaN Kayseri\n", + " Lake Akyatan Akyatan Gölü 35 km2 NaN Adana\n", + " Lake Balık Balık Gölü 34 km2 NaN Doğubeyazıt in Ağrı\n", + " Lake Marmara Marmara Gölü 34 km2 NaN Salihli, Gölmarmara in Manisa\n", + " Lake Çöl Çöl Gölü 32 km2 NaN Ankara\n", + "Lake Durusu (Lake Terkos) Durusu Gölü 25 km2 NaN İstanbul\n", + " Lake Karine Karine Gölü 24 km2 NaN NaN\n", + " Lake Tuzla Tuzla Gölü 23 km2 NaN Tuzla\n", + " Lake Küçükçekmece Küçükçekmece Gölü 16 km2 NaN Küçükçekmece, İstanbul\n", + " Lake Yaraşlı Yaraşlı Gölü 16 km2 NaN Burdur\n", + " Lake Haçlı Haçlı Gölü 16 km2 NaN Muş\n", + " Lake Seyfe Seyfe Gölü 15 km2 NaN Kırşehir\n", + " Lake Akyayan Akyayan Gölü 15 km2 NaN NaN\n", + " Lake Hozapin Hozapin Gölü 14 km2 NaN NaN\n", + " Lake Arin Arin Gölü 13 km2 NaN NaN\n", + " Lake Nemrut Nemrut Gölü 12 km2 NaN Bitlis Province\n", + " Lake Balık Balık Gölü 12 km2 NaN NaN\n", + " Lake Büyükçekmece Büyükçekmece Gölü 11 km2 NaN Büyükçekmece, Istanbul\n", + " Lake Boluk Boluk Gölü 11 km2 NaN NaN\n", + " Lake Akdoğan Akdoğan Gölü 11 km2 NaN NaN\n", + " Lake Çavuşlu Çavuşlu Gölü 9 km2 NaN NaN\n", + " Lake Düden Düden Gölü 8 km2 NaN NaN\n", + " Lake Gala Gala Gölü 8 km2 NaN Edirne\n", + " Lake Karataş Karataş Gölü 6 km2 NaN NaN\n", + " Lake Mogan Mogan Gölü 6 km2 NaN Ankara\n", + " Paradeniz Paradeniz 4 km2 NaN Mersin\n", + " Lake Eymir Eymir Gölü 1.8 km2 NaN Ankara\n", + " Lake Abant Abant Gölü 1.28 km2 18 m Bolu\n", + " Lake Gölcük Gölcük Gölü 1 km2 NaN İzmir\n", + "which is deeper, lake tuz or lake palas tuzla?\n", + "```json\n", + "df.query('Hand == \"Full house\" & 2 credits == 4 & theoretical_return > 98.68%')['Hand']\n", + "```\n", + " Hand 1 credit 2 credits 3 credits 4 credits 5 credits\n", + " Royal flush 250 500 750 1000 4000*\n", + " Straight flush 60 120 180 240 400\n", + " Four aces 400 800 1200 1600 2000\n", + "Four of a kind, 2-4 100 200 300 400 500\n", + "Four of a kind, 5-K 50 100 150 200 250\n", + " Full house 8 16 24 32 40\n", + " Flush 5 10 15 20 25\n", + " Straight 4 8 12 16 20\n", + " Three of a kind 3 6 9 12 15\n", + " Two pair 1 2 3 4 5\n", + " Jacks or better 1 2 3 4 5\n", + " Theoretical return 98.68% 98.68% 98.68% 98.68% 99.92%*\n", + "after winning on four credits with a full house, what is your payout?\n", + "```pandas\n", + "df[df['Position'] == 'Middle blocker']\n", + "```\n", + " No. Player Birth Date Weight Height Position Current Club\n", + " 4 Ardo Kreek August 7, 1986 (age 27) 96 203 Middle blocker Paris Volley\n", + " 5 Kert Toobal June 3, 1979 (age 35) 78 189 Setter Sivas 4 Eylül\n", + " 6 Martti Juhkami June 6, 1988 (age 26) 96 196 Spiker TV Bühl\n", + " 7 Argo Meresaar January 13, 1980 (age 34) 107 206 Opposite Bigbank Tartu\n", + " 8 Kusti Nõlvak November 6, 1991 (age 22) 81 186 Setter TTÜ VK\n", + " 9 Robert Täht August 15, 1993 (age 20) 80 190 Spiker Bigbank Tartu\n", + " 11 Oliver Venno May 23, 1990 (age 24) 105 210 Opposite Rennes Volley 35\n", + " 14 Rait Rikberg August 30, 1982 (age 31) 80 174 Libero Bigbank Tartu\n", + " 16 Edgar Järvekülg June 12, 1988 (age 26) 77 186 Libero Pärnu VK\n", + " 17 Siim Ennemuist December 5, 1989 (age 24) 89 196 Middle blocker TTÜ VK\n", + " 18 Jaanus Nõmmsalu January 19, 1981 (age 33) 94 200 Spiker TTÜ VK\n", + " 19 Andri Aganits September 7, 1993 (age 20) 99 207 Middle Blocker TV Bühl\n", + "which players played the same position as ardo kreek?\n" + ] + } + ], + "source": [ + "for i in range(n):\n", + " print(code[i])\n", + " print(pd.read_csv('data/' + train.context.iloc[i]).to_string(index=False))\n", + " print(train.utterance.iloc[i])" + ] + }, + { + "cell_type": "code", + "execution_count": 87, + "id": "2bb82209-c50e-4a31-a10a-77be411f4b4d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "df[df['Position'] == '1st']['Venue'].iloc[-1]\n", + "\n", + "\n", + "df['Opponent'][0]\n", + "\n", + "\n", + "\n", + "\n" + ] + } + ], + "source": [ + "for i in range(n):\n", + " # Очищаем код для текущей итерации\n", + " current_code = parse_panda_code(code[i])\n", + " \n", + " #print(f\"Итерация {i} | Запускаем выражение: {current_code}\")\n", + " \n", + " try:\n", + " # Читаем нужный датафрейм\n", + " df = pd.read_csv('data/' + train.context.iloc[i])\n", + " target = train.iloc[i].targetValue\n", + " \n", + " # Передаем в eval() код ИМЕННО для текущей итерации\n", + " result = eval(current_code)\n", + " \n", + " # print(\"--- Результат ---\")\n", + " # print(result)\n", + " # print(\"--- Правильный ответ ---\")\n", + " # print(target)\n", + " # print(\"-\" * 40)\n", + " if result == target:\n", + " print(current_code)\n", + " \n", + " except Exception as e:\n", + " print()\n", + " # Выводим реальный текст ошибки, чтобы понять, в чем проблема\n", + " #print(f\"Ошибка на итерации {i}: {e}\")\n", + " #print(\"-\" * 40)" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "id": "34a693df-cd37-4075-869b-1e1b837da533", + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv('data/'+ train.context.iloc[0])" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "id": "4d229f11-3a1f-4b98-a860-b2dc58c623e7", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING: Ignoring invalid distribution ~atplotlib (C:\\Users\\PC\\anaconda3\\Lib\\site-packages)\n" + ] + } + ], + "source": [ + "! pip freeze > requirements.txt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c752e932-710b-4e0c-ac77-d0ec621c0e9c", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/WTQ/Qwen/config.py b/WTQ/Qwen/config.py new file mode 100644 index 0000000..f4a43ba --- /dev/null +++ b/WTQ/Qwen/config.py @@ -0,0 +1,57 @@ +system_prompt = '''You are a Python expert specializing in pandas. You are given a question and a table. Your task is to translate the given natural language question into +a single-line pandas expression. This expression, which acts like a query, must +be valid and executable so that running the pandas expression will output the +answer to the question. Consider the following: +1. The table is represented as a pandas DataFrame named df. +2. Do not include explanations, comments, or multiline outputs. +3. Ensure the output is concise, correct, and when run, it outputs the correct +given answer, and strictly follows the Json format: {{"PANDA": ""}} + +### Table schema +{table} + +### Query +{query} + +''' + +logic_prompt = '''You are an expert in Python with a specialization in pandas. Your task is to verify and correct a given pandas code that translates a natural language statement into a pandas expression. The corrected pandas code must accurately evaluate the truth of the statement when applied to the given table. Requirements: + +The table is represented as a pandas DataFrame named df. + +The pandas code must evaluate to a value using the snippet: (eval(pandas_code)). The result can be boolean, number, string, date, or any type that matches the expected answer. + +The corrected pandas code must match the value indicated by the provided "Label". + +Ensure the output is concise, correct, and when run outputs the answer, and strictly in the following JSON format with a single key "PANDA": "CORRECT PANDA": "" + +### Table schema +{table} + +### Pandas code +{pandas} + +### Label +{label} +''' + +correct_prompt = '''You are an expert in Python, specializing in pandas. Your task is to correct a pandas code that translates a given natural language statement into a pandas expression. The code, along with the specific error it contains, is provided. Your corrected pandas_code must be valid and executable when running the code snippet eval(pandas_code), ensuring it accurately evaluates the statement using the provided table with no errors. + +The pandas_code can return any type (boolean, number, string, date, etc.) that matches the expected answer. Consider the following: + +The table is represented as a pandas DataFrame named df. + +Do not include explanations, comments, or multiline outputs. + +Ensure the output is concise, correct, and when run outputs the answer, and strictly in the following JSON format with a single key "PANDA": "" + +### Table schema +{table} + +### Pandas code +{pandas} + +### Label +{label} +''' + diff --git a/WTQ/Qwen/server.py b/WTQ/Qwen/server.py new file mode 100644 index 0000000..03aa306 --- /dev/null +++ b/WTQ/Qwen/server.py @@ -0,0 +1,75 @@ +import uvicorn +from fastapi import FastAPI, HTTPException +from pydantic import BaseModel +import torch +from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig + +# Заменили путь на локальный Qwen 7B +model_name = "D:/models/Qwen2.5-Coder-7B-Instruct" +print("Загрузка модели Qwen 7B в 4-бит на GPU...") + +# Инициализируем модель с 4-битным квантованием под RTX 4060 +tokenizer = AutoTokenizer.from_pretrained(model_name) + +quantization_config = BitsAndBytesConfig( + load_in_4bit=True, + bnb_4bit_quant_type="nf4", + bnb_4bit_compute_dtype=torch.bfloat16, + bnb_4bit_use_double_quant=True +) + +model = AutoModelForCausalLM.from_pretrained( + model_name, + quantization_config=quantization_config, + device_map="cuda:0" # Для квантованных моделей заменяет (.to("cuda")) +) + +app = FastAPI() + +class ChatRequest(BaseModel): + model: str + messages: list + temperature: float = 0.5 + max_tokens: int = 500 + +@app.post("/v1/chat/completions") +async def chat_completions(request: ChatRequest): + try: + user_message = request.messages[-1]["content"] + + text = tokenizer.apply_chat_template([{"role": "user", "content": user_message}], tokenize=False, add_generation_prompt=True) + + # Явно отправляем входные токены на видеокарту ("cuda") + model_inputs = tokenizer([text], return_tensors="pt", truncation=True, max_length=2048).to("cuda") + + if request.temperature <= 0.0: + gen_kwargs = {"do_sample": False} + else: + gen_kwargs = {"do_sample": True, "temperature": request.temperature} + + # Отключаем подсчет градиентов для экономии памяти при генерации + with torch.no_grad(): + generated_ids = model.generate( + **model_inputs, + max_new_tokens=request.max_tokens, + pad_token_id=tokenizer.eos_token_id, + **gen_kwargs + ) + + generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)] + response_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0] + + return { + "choices": [{ + "message": { + "role": "assistant", + "content": response_text + } + }] + } + except Exception as e: + print(f"\n[ВНУТРЕННЯЯ ОШИБКА СЕРВЕРА]: {e}\n") + raise HTTPException(status_code=500, detail=str(e)) + +if __name__ == "__main__": + uvicorn.run(app, host="0.0.0.0", port=9092) \ No newline at end of file From c0f28c06437a3b196199aa4ad12d6f60e2124898 Mon Sep 17 00:00:00 2001 From: AldarArmaev Date: Fri, 22 May 2026 13:23:48 +0800 Subject: [PATCH 04/10] =?UTF-8?q?=D0=94=D0=BE=D0=B1=D0=B0=D0=B2=D0=BB?= =?UTF-8?q?=D0=B5=D0=BD=D0=B0=20=D0=BD=D0=BE=D1=80=D0=BC=D0=B0=D0=BB=D0=B8?= =?UTF-8?q?=D0=B7=D0=B0=D1=86=D0=B8=D1=8F=20=D0=B4=D0=B0=D0=BD=D0=BD=D1=8B?= =?UTF-8?q?=D1=85?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- WTQ/Qwen/Untitled.ipynb | 6041 +++++++++++++++++++++++++++++++++++---- WTQ/Qwen/normalize.py | 116 + 2 files changed, 5680 insertions(+), 477 deletions(-) create mode 100644 WTQ/Qwen/normalize.py diff --git a/WTQ/Qwen/Untitled.ipynb b/WTQ/Qwen/Untitled.ipynb index a8b094e..5267381 100644 --- a/WTQ/Qwen/Untitled.ipynb +++ b/WTQ/Qwen/Untitled.ipynb @@ -2,29 +2,36 @@ "cells": [ { "cell_type": "code", - "execution_count": 5, + "execution_count": 223, "id": "b824dfad-5524-47e2-b804-6f71e0507f49", "metadata": {}, "outputs": [], "source": [ + "import httpx\n", "import openai\n", "import time\n", - "import re\n", - "import pandas as pd\n", "\n", "def send_message(\n", " message, \n", - " max_tokens=1000, # Увеличил дефолт, так как таблицы и логи могут быть длинными\n", + " max_tokens=1000, \n", " top_p=0.9, \n", - " temperature=0.0, # Для задач интерпретатора строго 0.0 по умолчанию\n", + " temperature=0.0, \n", " server_url=\"http://127.0.0.1:9092/v1\", \n", " api_key=\"dummy\",\n", " model_name='Qwen2.5-Coder-7B-Instruct', \n", - " stop=None, # Для обычных ответов стоп-слова лучше сделать опциональными\n", - " retries=3 # Количество попыток при падении сервера\n", + " stop=None, \n", + " retries=3 \n", "):\n", - " # Инициализируем клиент OpenAI\n", - " client = openai.OpenAI(base_url=server_url, api_key=api_key)\n", + " # СОЗДАЕМ КЛИЕНТ БЕЗ ПРОКСИ ДЛЯ РАБОТЫ С LOCALHOST ДАЖЕ ПРИ ВКЛЮЧЕННОМ VPN\n", + " # Передаем пустой словарь в proxies, чтобы httpx игнорировал системные настройки VPN\n", + " http_client = httpx.Client(proxies={})\n", + " \n", + " # Инициализируем клиент OpenAI с кастомным http_client\n", + " client = openai.OpenAI(\n", + " base_url=server_url, \n", + " api_key=api_key,\n", + " http_client=http_client # <-- Передаем его сюда\n", + " )\n", " \n", " model_input = [\n", " { 'role': 'user', 'content': message}\n", @@ -47,7 +54,6 @@ " except Exception as e:\n", " print(f\"\\n[ERROR] Failed to call LLM: {e}\")\n", " \n", - " # Разбираем ответ сервера, если он есть\n", " if hasattr(e, 'response') and e.response is not None:\n", " try:\n", " error_info = e.response.json() \n", @@ -56,7 +62,6 @@ " except Exception:\n", " print(f\"Сырой ответ сервера об ошибке: {e.response.text}\")\n", " \n", - " # Если попытки еще остались — пробуем снова\n", " if retries > 0:\n", " print(f\"Waiting 6 seconds before retry... (Remaining retries: {retries})\")\n", " time.sleep(6)\n", @@ -68,7 +73,6 @@ " else:\n", " print(\"All retries failed. Skipping.\")\n", " return False, None\n", - "\n", "def parse_panda_code(input_string):\n", " # Сначала попробуем найти JSON объект с PANDA\n", " json_pattern = r'\\{[^{}]*(?:CORRECT PANDA|PANDA)\":\\s*(.+?)(?:\\n|$)?\\}'\n", @@ -669,7 +673,28 @@ }, { "cell_type": "code", - "execution_count": 71, + "execution_count": 221, + "id": "1bf2008c-e3d7-40be-a56e-429fdef7c670", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'You are an expert in Python, specializing in pandas. Your task is to translate a natural language question into a single-line pandas expression.\\n\\nCRITICAL RULES:\\n1. NEVER use the assignment operator (=). The code must be a standalone expression executable via eval(). Do not modify df, only query it.\\n2. The code must return a single final value (scalar, list of strings, number, or boolean). Never return an entire DataFrame or a Series without extracting the value.\\n3. If you need to filter rows, use dynamic conditions like df[df[\\'col\\'] == \\'val\\']. Do not hardcode specific row index numbers (like loc[3] or at[49]).\\n4. When doing math (subtraction, division) between two different rows, always extract both values to scalars first using .iloc[0] or .values[0] before the math operator.\\n5. Keep all brackets [ ] and parentheses ( ) perfectly balanced.\\n\\nEnsure the output is strictly in JSON format with a single key: {\"PANDA\": \"\"}\\n'" + ] + }, + "execution_count": 221, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "config.system_prompt_2" + ] + }, + { + "cell_type": "code", + "execution_count": 227, "id": "97549381-ebfe-4cee-846b-bdd7bcb8feae", "metadata": {}, "outputs": [ @@ -678,574 +703,5636 @@ "output_type": "stream", "text": [ "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", - "[0] Execution error: operation 'rand_' not supported for dtype 'str' with object of type \n", "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", - "[3] Execution error: unsupported operand type(s) for -: 'str' and 'str'\n", + "[3] Execution error: index out of bounds\n", "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", - "[5] Execution error: operation 'sub' not supported for dtype 'str' with dtype 'datetime64[us]'\n", "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "Failed to parse JSON from response: 'int' object has no attribute 'strip'\n", "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", - "[6] Execution error: Expecting ',' delimiter: line 2 column 27 (char 28)\n", "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", - "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", - "[8] Execution error: 'Full house'\n", + "[8] Execution error: 'Theoretical return'\n", "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", "Correct: 2, Logic: 0, Syntax: 0, Failed: 8\n", - "Total processed: 10\n" + "Total processed: 10\n", + "===== HISTORY =====\n", + "Index 0: status=logic_failed, changed=True\n", + " Original: df.loc[df['League'] == 'USL A-League', 'Year'].max()\n", + " Corrected: df.loc[(df['League'] == 'USL A-League') & (df['Division'] == 2), 'Year'].max()\n", + "\n", + "Index 1: status=correct, changed=False\n", + " Original: df.loc[df['Position'] == '1st', 'Venue'].iloc[-1]\n", + "\n", + "Index 2: status=logic_failed, changed=True\n", + " Original: df.loc[df['Team'] == 'Crettyard', 'Years won'].iloc[0] - 1\n", + " Corrected: df.loc[(df['Team'] == 'Crettyard') & (df['Years won'] > 2005), 'Team'].values[0]\n", + "\n", + "Index 3: status=syntax_failed, changed=True\n", + " Original: (df.loc[(df['City'] == 'United States, Los Angeles') & (df['Ranking'] == 1), 'Passengers'].values[0] - df.loc[(df['City'] == 'Canada, Saskatoon') & (df['Ranking'] == 4), 'Passengers'].values[0])\n", + " Corrected: (df.loc[df['City'] == 'United States, Los Angeles', 'Passengers'].values[0] - df.loc[df['City'] == 'Canada, Saskatoon', 'Passengers'].values[0])\n", + "\n", + "Index 4: status=correct, changed=False\n", + " Original: df.iloc[0]['Opponent']\n", + "\n", + "Index 5: status=logic_failed, changed=False\n", + " Original: df[(pd.to_datetime(df['Left office']) - pd.to_datetime(df['Took office'])) >= pd.Timedelta(days=1095)].shape[0]\n", + "\n", + "Index 6: status=logic_failed, changed=False\n", + " Original: df['Away team'].iloc[0]\n", + " Corrected: df['Away team'].iloc[0]\n", + "\n", + "Index 7: status=logic_failed, changed=True\n", + " Original: df.loc[df['Name in English'].isin(['Lake Tuz', 'Lake Palas Tuzla']), 'Depth'].max()\n", + " Corrected: df.loc[df['Name in English'].isin(['Lake Tuz', 'Lake Palas Tuzla']), 'Depth'].idxmax()\n", + "\n", + "Index 8: status=syntax_failed, changed=False\n", + " Original: df.loc[(df['Hand'] == 'Full house') & (df['4 credits'] == df['4 credits'].max()), 'Theoretical return'].values[0]\n", + " Corrected: df.loc[(df['Hand'] == 'Full house') & (df['4 credits'] == df['4 credits'].max()), 'Theoretical return'].values[0]\n", + "\n", + "Index 9: status=logic_failed, changed=True\n", + " Original: df[df['Position'] == df.loc[df['Player'] == 'Ardo Kreek', 'Position'].values[0]]\n", + " Corrected: df[(df['Position'] == df.loc[df['Player'] == 'Ardo Kreek', 'Position'].values[0]) & (df['Player'] != 'Ardo Kreek')]['Player'].tolist()\n", + "\n" ] } ], "source": [ - "correct_count = 0\n", - "logic_count = 0\n", - "syntax_count = 0\n", - "failed_count = 0\n", + "import pandas as pd\n", + "import config\n", + "import json\n", + "import re\n", + "import importlib # <-- ИСПРАВЛЕНО: добавили импорт\n", + "\n", + "importlib.reload(config) \n", + "\n", + "CURRENT_SYSTEM_PROMPT = config.system_prompt_2\n", + "CURRENT_SYNTAX_PROMPT = config.syntax_prompt_2\n", + "CURRENT_LOGIC_PROMPT = config.logic_prompt_2\n", "\n", + "correct_count = logic_count = syntax_count = failed_count = 0\n", "n = 10\n", + "code = {'correct': '', 're_correct': '', 're_uncorrect': ''}\n", + "history = []\n", "\n", - "code = {\n", - " 'correct': '',\n", - " 're_correct': '',\n", - " 're_uncorrect': ''\n", - "}\n", + "def exec_pandas(response_text):\n", + " \"\"\"Парсит JSON-ответ от модели, исправляя проблемы экранирования слэшей.\"\"\"\n", + " try:\n", + " json_match = re.search(r'\\{.*\\}', response_text, re.DOTALL)\n", + " if not json_match:\n", + " return None\n", + " \n", + " raw_json = json_match.group(0)\n", + " \n", + " try:\n", + " data = json.loads(raw_json)\n", + " except json.JSONDecodeError:\n", + " # ИСПРАВЛЕНО: автоматически чинит неэкранированные слэши в регулярках от Qwen\n", + " fixed_json = re.sub(r'(?= 1095]) df['Away team'].iloc[0] df.loc[df['Name in English'] == 'Lake Palas Tuzla', 'Depth'].values[0] df.loc['Full house', '4 credits'] df[(df['Position'] == df.loc[3, 'Position']) & (df['Player'] != 'Siim Ennemuist')]['Player'].tolist()\"}" - ] - }, - "execution_count": 73, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "code" + "import re\n", + "\n", + "def normalize_string(s):\n", + " \"\"\"Базовая очистка строки: нижний регистр, удаление лишних пробелов.\"\"\"\n", + " if s is None:\n", + " return \"\"\n", + " # Переводим в строку, в нижний регистр, заменяем переносы строк и множественные пробелы\n", + " s = str(s).lower().strip()\n", + " s = re.sub(fr\"\\s+\", \" \", s)\n", + " return s\n", + "\n", + "def extract_numeric(s):\n", + " \"\"\"Оставляет от строки только цифры и точку (для float).\"\"\"\n", + " # Удаляем запятые (разделители тысяч, как в 5,000)\n", + " s = s.replace(\",\", \"\")\n", + " # Ищем все цифры и точку\n", + " match = re.findall(r\"[-+]?\\d*\\.\\d+|\\d+\", s)\n", + " return \"\".join(match) if match else s\n", + "\n", + "def to_token_set(value):\n", + " \"\"\"Разбивает строку или список на множество нормализованных токенов.\"\"\"\n", + " if isinstance(value, (list, tuple, set)):\n", + " items = [normalize_string(x) for x in value]\n", + " else:\n", + " # Разбиваем по пайпу |, запятой с пробелом, или переносу строки\n", + " string_val = normalize_string(value)\n", + " items = re.split(r\"\\||\\n|,\\s\", string_val)\n", + " \n", + " return {item.strip() for item in items if item.strip()}\n", + "\n", + "def smart_compare(py_result, target_value):\n", + " \"\"\"Умное сравнение с гарантированным возвратом финальных обработанных строк.\"\"\"\n", + " if isinstance(py_result, bool):\n", + " py_result = \"yes\" if py_result else \"no\"\n", + " \n", + " str_res = normalize_string(py_result)\n", + " str_tar = normalize_string(target_value)\n", + " \n", + " # 1. Прямое совпадение строк\n", + " if str_res == str_tar:\n", + " return True, str_res, str_tar\n", + " \n", + " # 2. Обработка списков\n", + " set_res = to_token_set(py_result)\n", + " set_tar = to_token_set(target_value)\n", + " if set_res == set_tar and len(set_res) > 0:\n", + " return True, \"|\".join(sorted(set_res)), \"|\".join(sorted(set_tar))\n", + " \n", + " # 3. Обработка чисел с единицами измерения\n", + " num_res = extract_numeric(str_res)\n", + " num_tar = extract_numeric(str_tar)\n", + " if num_res == num_tar and num_res != \"\":\n", + " try:\n", + " if float(num_res) == float(num_tar):\n", + " r_out = str(int(float(num_res))) if float(num_res).is_integer() else num_res\n", + " t_out = str(int(float(num_tar))) if float(num_tar).is_integer() else num_tar\n", + " return True, r_out, t_out\n", + " except ValueError:\n", + " pass\n", + "\n", + " # 4. Обработка времени\n", + " time_res = re.sub(r\"^0:\", \"\", str_res).strip()\n", + " time_tar = re.sub(r\"^0:\", \"\", str_tar).strip()\n", + " if time_res == time_tar:\n", + " return True, time_res, time_tar\n", + "\n", + " # Если вообще ничего не совпало, но это были списки/пайпы, \n", + " # покажем их в едином отсортированном текстовом формате для наглядности\n", + " if len(set_tar) > 1 or len(set_res) > 1:\n", + " return False, \"|\".join(sorted(set_res)), \"|\".join(sorted(set_tar))\n", + "\n", + " # Для обычных строк возвращаем базовый очищенный вариант\n", + " return False, str_res, str_tar" ] }, { "cell_type": "code", - "execution_count": 59, - "id": "63549786-67fd-4212-91eb-eca42135a8a0", + "execution_count": 291, + "id": "531b6c6a-c249-46a7-b252-bf2f1c4c6587", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "df.loc[df['Division'] == 2 & df['League'] == 'USL A-League', 'Year'].max()\n", - "df.loc[df['Position'] == '1st', 'Venue'].iloc[-1]\n", - "df.loc[df['Team'] == 'Crettyard', 'Years won'].iloc[0] - 1\n", - "(df.loc[df['City'] == 'United States, Los Angeles', 'Passengers'].values[0] - df.loc[df['City'] == 'Canada, Saskatoon', 'Passengers'].values[0])\n", - "df['Opponent'].iloc[0]\n", - "len(df[df['Left office'] - pd.to_datetime(df['Took office']) >= pd.Timedelta(days=1095)])\n", - "df['Away team'].iloc[0]\n", - "df.loc[df['Name in English'].isin(['Lake Tuz', 'Lake Palas Tuzla']), 'Depth'].max()\n", - "df.loc['Full house', '4 credits']\n", - "df[df['Position'] == df.loc[3, 'Position']]['Player'].tolist()\n" + "[0] Код: df.loc[(df['League'] == 'USL A-League') & (df['Division'] == 2), 'Year'].max()\n", + " 1. Результат Pandas: 2004 (Тип: int64)\n", + " 2. Целевое до обработки: 2004\n", + " 3. Целевое после обработки: 2004\n", + " 4. Вердикт smart: ✓ СОВПАЛО\n", + "------------------------------------------------------------\n", + "[1] Код: df.loc[df['Position'] == '1st', 'Venue'].iloc[-1]\n", + " 1. Результат Pandas: Bangkok, Thailand (Тип: str)\n", + " 2. Целевое до обработки: Bangkok, Thailand\n", + " 3. Целевое после обработки: bangkok, thailand\n", + " 4. Вердикт smart: ✓ СОВПАЛО\n", + "------------------------------------------------------------\n", + "[2] Код: df.loc[(df['Team'] == 'Crettyard') & (df['Years won'] > 2005), 'Team'].values[0]\n", + " 1. Результат Pandas: Ошибка выполнения: index out of bounds (Тип: str)\n", + " 2. Целевое до обработки: Wolfe Tones\n", + " 3. Целевое после обработки: Wolfe Tones\n", + " 4. Вердикт smart: ✗ НЕ СОВПАЛО\n", + "------------------------------------------------------------\n", + "[3] Код: (df.loc[df['City'] == 'United States, Los Angeles', 'Passengers'].values[0] - df.loc[df['City'] == 'Canada, Saskatoon', 'Passengers'].values[0])\n", + " 1. Результат Pandas: Ошибка выполнения: unsupported operand type(s) for -: 'str' and 'str' (Тип: str)\n", + " 2. Целевое до обработки: 12,467\n", + " 3. Целевое после обработки: 12,467\n", + " 4. Вердикт smart: ✗ НЕ СОВПАЛО\n", + "------------------------------------------------------------\n", + "[4] Код: df.iloc[0]['Opponent']\n", + " 1. Результат Pandas: Derby County (Тип: str)\n", + " 2. Целевое до обработки: Derby County\n", + " 3. Целевое после обработки: derby county\n", + " 4. Вердикт smart: ✓ СОВПАЛО\n", + "------------------------------------------------------------\n", + "[5] Код: df[(pd.to_datetime(df['Left office']) - pd.to_datetime(df['Took office'])) >= pd.Timedelta(days=1095)].shape[0]\n", + " 1. Результат Pandas: 5 (Тип: int)\n", + " 2. Целевое до обработки: 4\n", + " 3. Целевое после обработки: 4\n", + " 4. Вердикт smart: ✗ НЕ СОВПАЛО\n", + "------------------------------------------------------------\n", + "[6] Код: df['Away team'].iloc[0]\n", + " 1. Результат Pandas: Varbergs GIF (D3) (Тип: str)\n", + " 2. Целевое до обработки: Varbergs GIF\n", + " 3. Целевое после обработки: varbergs gif\n", + " 4. Вердикт smart: ✗ НЕ СОВПАЛО\n", + "------------------------------------------------------------\n", + "[7] Код: df.loc[df['Name in English'].isin(['Lake Tuz', 'Lake Palas Tuzla']), 'Depth'].idxmax()\n", + " 1. Результат Pandas: 1 (Тип: int)\n", + " 2. Целевое до обработки: Lake Palas Tuzla\n", + " 3. Целевое после обработки: lake palas tuzla\n", + " 4. Вердикт smart: ✗ НЕ СОВПАЛО\n", + "------------------------------------------------------------\n", + "[8] Код: df.loc[(df['Hand'] == 'Full house') & (df['4 credits'] == df['4 credits'].max()), 'Theoretical return'].values[0]\n", + " 1. Результат Pandas: Ошибка выполнения: 'Theoretical return' (Тип: str)\n", + " 2. Целевое до обработки: 32\n", + " 3. Целевое после обработки: 32\n", + " 4. Вердикт smart: ✗ НЕ СОВПАЛО\n", + "------------------------------------------------------------\n", + "[9] Код: df[(df['Position'] == df.loc[df['Player'] == 'Ardo Kreek', 'Position'].values[0]) & (df['Player'] != 'Ardo Kreek')]['Player'].tolist()\n", + " 1. Результат Pandas: ['Siim Ennemuist'] (Тип: list)\n", + " 2. Целевое до обработки: Siim Ennemuist|Andri Aganits\n", + " 3. Целевое после обработки: siim ennemuist|andri aganits\n", + " 4. Вердикт smart: ✗ НЕ СОВПАЛО\n", + "------------------------------------------------------------\n" ] } ], "source": [ - "for i in range(len(code)):\n", - " current code = exec_pandas(code[i])\n", - " df = pd.read_csv('data/' + train.context.iloc[i])\n", - " target = train.targetValue.iloc[i]" + "import pandas as pd\n", + "\n", + "def check_code(p_code, table_path, label):\n", + " \"\"\"Выполняет код и проверяет соответствие лейблу через smart_compare.\"\"\"\n", + " try:\n", + " local_vars = {'pd': pd, 'df': pd.read_csv(table_path)}\n", + " result = eval(p_code, {}, local_vars)\n", + " \n", + " # Берем только первый элемент (bool вердикт) из функции умного сравнения\n", + " comparison_res = smart_compare(result, label)\n", + " return comparison_res[0] if isinstance(comparison_res, tuple) else comparison_res\n", + " except:\n", + " return False\n", + "\n", + "# Имитация работы smart_compare для стабильности (замените своей, если она сложнее)\n", + "def smart_compare(res, target):\n", + " # Базовая нормализация для примера\n", + " res_str = str(res).strip().lower()\n", + " tar_str = str(target).strip().lower()\n", + " \n", + " # Умное приведение: например, \"3238.0\" должно быть равно \"3238\"\n", + " try:\n", + " if float(res_str) == float(tar_str):\n", + " return True, res_str, tar_str\n", + " except ValueError:\n", + " pass\n", + " \n", + " is_valid = res_str == tar_str\n", + " return is_valid, res_str, tar_str\n", + "\n", + "# Основной цикл проверки истории\n", + "for item in history:\n", + " idx = item['index']\n", + " \n", + " # Выбираем скорректированный код, если он есть\n", + " p_code = item['corrected'] if item['corrected'] else item['original']\n", + " \n", + " # ВАЖНО: Убедитесь, что здесь указан путь именно к ОЧИЩЕННЫМ таблицам\n", + " table_path = f\"data/{train.context.iloc[idx]}\"\n", + " label = train.targetValue.iloc[idx]\n", + " \n", + " try:\n", + " # Читаем очищенный датафрейм\n", + " df_cleaned = pd.read_csv(table_path)\n", + " local_vars = {'pd': pd, 'df': df_cleaned}\n", + " \n", + " # Выполняем код\n", + " pandas_result = eval(p_code, {}, local_vars)\n", + " \n", + " # Получаем вердикт и обработанные строки\n", + " is_valid, res_processed, tar_processed = smart_compare(pandas_result, label)\n", + " \n", + " except Exception as e:\n", + " pandas_result = f\"Ошибка выполнения: {e}\"\n", + " tar_processed = str(label).strip() # Базовая очистка при падении\n", + " is_valid = False\n", + "\n", + " # Вывод результатов в консоль\n", + " print(f\"[{idx}] Код: {p_code}\")\n", + " print(f\" 1. Результат Pandas: {pandas_result} (Тип: {type(pandas_result).__name__})\")\n", + " print(f\" 2. Целевое до обработки: {label}\")\n", + " print(f\" 3. Целевое после обработки: {tar_processed}\")\n", + " print(f\" 4. Вердикт smart: {'✓ СОВПАЛО' if is_valid else '✗ НЕ СОВПАЛО'}\")\n", + " print(\"-\" * 60)" ] }, { "cell_type": "code", - "execution_count": 55, - "id": "1219cf2d-55b4-4cf8-9ee0-93702e2d1ce9", + "execution_count": 215, + "id": "92dac3a0-1cb6-46ff-9a48-61e73f087e3d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "{0: '```json\\n{\\n \"PANDA\": \"df.loc[df[\\'Division\\'] == 2 & df[\\'League\\'] == \\'USL A-League\\', \\'Year\\'].max()\"\\n}\\n```',\n", - " 1: '```json\\n{\\n \"PANDA\": \"df.loc[df[\\'Position\\'] == \\'1st\\', \\'Venue\\'].iloc[-1]\"\\n}\\n```',\n", - " 2: '```json\\n{\\n \"PANDA\": \"df.loc[df[\\'Team\\'] == \\'Crettyard\\', \\'Years won\\'].iloc[0] - 1\"\\n}\\n```',\n", - " 3: '```json\\n{\\n \"PANDA\": \"(df.loc[df[\\'City\\'] == \\'United States, Los Angeles\\', \\'Passengers\\'].values[0] - df.loc[df[\\'City\\'] == \\'Canada, Saskatoon\\', \\'Passengers\\'].values[0])\"\\n}\\n```',\n", - " 4: '```json\\n{\\n \"PANDA\": \"df[\\'Opponent\\'].iloc[0]\"\\n}\\n```',\n", - " 5: '```json\\n{\\n \"PANDA\": \"len(df[df[\\'Left office\\'] - pd.to_datetime(df[\\'Took office\\']) >= pd.Timedelta(days=1095)])\"\\n}\\n```',\n", - " 6: '```json\\n{\\n \"PANDA\": \"df[\\'Away team\\'].iloc[0]\"\\n}\\n```',\n", - " 7: '```json\\n{\\n \"PANDA\": \"df.loc[df[\\'Name in English\\'].isin([\\'Lake Tuz\\', \\'Lake Palas Tuzla\\']), \\'Depth\\'].max()\"\\n}\\n```',\n", - " 8: '```json\\n{\\n \"PANDA\": \"df.loc[\\'Full house\\', \\'4 credits\\']\"\\n}\\n```',\n", - " 9: '```json\\n{\\n \"PANDA\": \"df[df[\\'Position\\'] == df.loc[3, \\'Position\\']][\\'Player\\'].tolist()\"\\n}\\n```'}" + "[{'index': 0,\n", + " 'original': \"df.loc[df['A'] == 'x', 'B'].values[0] - df.loc[df['A'] == 'y', 'B'].values[0]\",\n", + " 'corrected': \"df['column_name'] = df['column_name'].apply(lambda x: int(x > threshold))\",\n", + " 'status': 'syntax_failed',\n", + " 'changed': True},\n", + " {'index': 1,\n", + " 'original': \"df.loc[df['A'] == 'x', 'B'].values[0] - df.loc[df['A'] == 'y', 'B'].values[0]\",\n", + " 'corrected': \"df['column_name'] = df['column_name'].apply(lambda x: int(x > threshold))\",\n", + " 'status': 'syntax_failed',\n", + " 'changed': True},\n", + " {'index': 2,\n", + " 'original': \"df.loc[df['A'] == 'x', 'B'].values[0] - df.loc[df['A'] == 'y', 'B'].values[0]\",\n", + " 'corrected': \"df['column_name'] = df['column_name'].apply(lambda x: int(x > threshold))\",\n", + " 'status': 'syntax_failed',\n", + " 'changed': True},\n", + " {'index': 3,\n", + " 'original': \"df.loc[df['A'] == 'x', 'B'].values[0] - df.loc[df['A'] == 'y', 'B'].values[0]\",\n", + " 'corrected': \"df['column_name'] = df['column_name'].apply(lambda x: int(x > threshold))\",\n", + " 'status': 'syntax_failed',\n", + " 'changed': True},\n", + " {'index': 4,\n", + " 'original': \"df.loc[df['A'] == 'x', 'B'].values[0] - df.loc[df['A'] == 'y', 'B'].values[0]\",\n", + " 'corrected': \"df['column_name'] = df['column_name'].apply(lambda x: int(x > threshold))\",\n", + " 'status': 'syntax_failed',\n", + " 'changed': True},\n", + " {'index': 5,\n", + " 'original': \"df.loc[df['A'] == 'x', 'B'].values[0] - df.loc[df['A'] == 'y', 'B'].values[0]\",\n", + " 'corrected': \"df['column_name'] = df['column_name'].apply(lambda x: int(x > threshold))\",\n", + " 'status': 'syntax_failed',\n", + " 'changed': True},\n", + " {'index': 6,\n", + " 'original': \"df.loc[df['A'] == 'x', 'B'].values[0] - df.loc[df['A'] == 'y', 'B'].values[0]\",\n", + " 'corrected': \"df['column_name'] = df['column_name'].apply(lambda x: int(x > threshold))\",\n", + " 'status': 'syntax_failed',\n", + " 'changed': True},\n", + " {'index': 7,\n", + " 'original': \"df.loc[df['A'] == 'x', 'B'].values[0] - df.loc[df['A'] == 'y', 'B'].values[0]\",\n", + " 'corrected': \"df['column_name'] = df['column_name'].apply(lambda x: int(x > threshold))\",\n", + " 'status': 'syntax_failed',\n", + " 'changed': True},\n", + " {'index': 8,\n", + " 'original': \"df.loc[df['A'] == 'x', 'B'].values[0] - df.loc[df['A'] == 'y', 'B'].values[0]\",\n", + " 'corrected': \"df['column_name'] = df['column_name'].apply(lambda x: int(x > threshold))\",\n", + " 'status': 'syntax_failed',\n", + " 'changed': True},\n", + " {'index': 9,\n", + " 'original': \"df.loc[df['A'] == 'x', 'B'].values[0] - df.loc[df['A'] == 'y', 'B'].values[0]\",\n", + " 'corrected': \"df['column_name'] = df['column_name'].apply(lambda x: int(x > threshold))\",\n", + " 'status': 'syntax_failed',\n", + " 'changed': True}]" ] }, - "execution_count": 55, + "execution_count": 215, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "code" - ] - }, - { - "cell_type": "code", - "execution_count": 156, - "id": "8c446433-f113-4c4f-a791-adac6e94a19a", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "```json\n", - "{\"PANDA\": \"df[df['League'] == 'USL A-League'].index[-1].year\"}\n", - "```\n", - " Year Division League Regular Season Playoffs Open Cup Avg. Attendance\n", - " 2001 2 USL A-League 4th, Western Quarterfinals Did not qualify 7,169\n", - " 2002 2 USL A-League 2nd, Pacific 1st Round Did not qualify 6,260\n", - " 2003 2 USL A-League 3rd, Pacific Did not qualify Did not qualify 5,871\n", - " 2004 2 USL A-League 1st, Western Quarterfinals 4th Round 5,628\n", - " 2005 2 USL First Division 5th Quarterfinals 4th Round 6,028\n", - " 2006 2 USL First Division 11th Did not qualify 3rd Round 5,575\n", - " 2007 2 USL First Division 2nd Semifinals 2nd Round 6,851\n", - " 2008 2 USL First Division 11th Did not qualify 1st Round 8,567\n", - " 2009 2 USL First Division 1st Semifinals 3rd Round 9,734\n", - " 2010 2 USSF D-2 Pro League 3rd, USL (3rd) Quarterfinals 3rd Round 10,727\n", - "what was the last year where this team was a part of the usl a-league?\n", - "```json\n", - "df[df['Position'] == '1st']['Venue'].iloc[-1]\n", - "```\n", - " Year Competition Venue Position Event Notes\n", - " 2001 World Youth Championships Debrecen, Hungary 2nd 400 m 47.12\n", - " 2001 World Youth Championships Debrecen, Hungary 1st Medley relay 1:50.46\n", - " 2001 European Junior Championships Grosseto, Italy 1st 4x400 m relay 3:06.12\n", - " 2003 European Junior Championships Tampere, Finland 3rd 400 m 46.69\n", - " 2003 European Junior Championships Tampere, Finland 2nd 4x400 m relay 3:08.62\n", - " 2005 European U23 Championships Erfurt, Germany 11th (sf) 400 m 46.62\n", - " 2005 European U23 Championships Erfurt, Germany 1st 4x400 m relay 3:04.41\n", - " 2005 Universiade Izmir, Turkey 7th 400 m 46.89\n", - " 2005 Universiade Izmir, Turkey 1st 4x400 m relay 3:02.57\n", - " 2006 World Indoor Championships Moscow, Russia 2nd (h) 4x400 m relay 3:06.10\n", - " 2006 European Championships Gothenburg, Sweden 3rd 4x400 m relay 3:01.73\n", - " 2007 European Indoor Championships Birmingham, United Kingdom 3rd 4x400 m relay 3:08.14\n", - " 2007 Universiade Bangkok, Thailand 7th 400 m 46.85\n", - " 2007 Universiade Bangkok, Thailand 1st 4x400 m relay 3:02.05\n", - " 2008 World Indoor Championships Valencia, Spain 4th 4x400 m relay 3:08.76\n", - " 2008 Olympic Games Beijing, China 7th 4x400 m relay 3:00.32\n", - " 2009 Universiade Belgrade, Serbia 2nd 4x400 m relay 3:05.69\n", - "in what city did piotr's last 1st place finish occur?\n", - "```json\n", - "df[df['County'] == 'Laois']['Team'].iloc[0]\n", - "```\n", - " Team County Wins Years won\n", - " Greystones Wicklow 1 2011\n", - "Ballymore Eustace Kildare 1 2010\n", - " Maynooth Kildare 1 2009\n", - " Ballyroan Abbey Laois 1 2008\n", - " Fingal Ravens Dublin 1 2007\n", - " Confey Kildare 1 2006\n", - " Crettyard Laois 1 2005\n", - " Wolfe Tones Meath 1 2004\n", - " Dundalk Gaels Louth 1 2003\n", - "which team won previous to crettyard?\n", - "```json\n", - "df[(df['City'] == 'Los Angeles') & (df['Ranking'] > 3)]['Passengers'].sum() - df[df['City'] == 'Saskatoon']['Passengers'].sum()\n", - "```\n", - " Rank City Passengers Ranking Airline\n", - " 1 United States, Los Angeles 14,749 NaN Alaska Airlines\n", - " 2 United States, Houston 5,465 NaN United Express\n", - " 3 Canada, Calgary 3,761 NaN Air Transat, WestJet\n", - " 4 Canada, Saskatoon 2,282 4.0 NaN\n", - " 5 Canada, Vancouver 2,103 NaN Air Transat\n", - " 6 United States, Phoenix 1,829 1.0 US Airways\n", - " 7 Canada, Toronto 1,202 1.0 Air Transat, CanJet\n", - " 8 Canada, Edmonton 110 NaN NaN\n", - " 9 United States, Oakland 107 NaN NaN\n", - "how many more passengers flew to los angeles than to saskatoon from manzanillo airport in 2013?\n", - "```pandas\n", - "df[df['Date'] == '15 August 1987']['Opponent']\n", - "```\n", - " Date Opponent Venue Result Attendance Scorers\n", - " 15 August 1987 Derby County Away 0–1 17,204 —\n", - " 18 August 1987 Coventry City Home 0–1 09,380 —\n", - " 22 August 1987 West Ham United Home 2–2 08,073 Harford (2)\n", - " 29 August 1987 Chelsea Away 0–3 16,075 —\n", - " 31 August 1987 Arsenal Home 1–1 08,745 Wilson (pen)\n", - " 5 September 1987 Oxford United Away 5–2 06,804 Breacker, Harford, Hill, Nwajiobi, B. Stein\n", - "12 September 1987 Everton Home 2–1 08,124 Hill, B. Stein\n", - "19 September 1987 Charlton Athletic Away 0–1 05,002 —\n", - "26 September 1987 Queens Park Rangers Away 0–2 11,175 —\n", - " 3 October 1987 Manchester United Home 1–1 09,137 Harford\n", - " 10 October 1987 Portsmouth Away 1–3 12,391 Harford (pen)\n", - " 17 October 1987 Wimbledon Home 2–0 07,018 B. Stein, Wilson\n", - " 24 October 1987 Liverpool Home 0–1 11,997 —\n", - " 7 November 1987 Newcastle United Home 4–0 07,638 Nwajiobi, B. Stein, M. Stein (2)\n", - " 14 November 1987 Sheffield Wednesday Away 2–0 16,960 Allinson, M. Stein\n", - " 21 November 1987 Tottenham Hotspur Home 2–0 10,091 Allinson (2)\n", - " 5 December 1987 Norwich City Home 1–2 07,002 B. Stein\n", - " 12 December 1987 Watford Away 1–0 12,152 Foster\n", - " 18 December 1987 Southampton Home 2–2 06,618 Harford, McDonough\n", - " 26 December 1987 Everton Away 0–2 32,128 —\n", - " 28 December 1987 Charlton Athletic Home 1–0 07,243 Wilson\n", - " 1 January 1988 Chelsea Home 3–0 08,018 Harford, B. Stein, M. Stein\n", - " 2 January 1988 West Ham United Away 1–1 16,716 M. Stein\n", - " 16 January 1988 Derby County Home 1–0 07,175 McDonough\n", - " 6 February 1988 Oxford United Home 7–4 08,063 Harford (2), McDonough, B.Stein, M.Stein (3)\n", - " 13 February 1988 Arsenal Away 1–2 22,612 M.Stein\n", - " 5 March 1988 Wimbledon Away 0–2 04,854 —\n", - " 15 March 1988 Coventry City Away 0–4 13,711 —\n", - " 29 March 1988 Portsmouth Home 4–1 06,740 B.Stein, M.Stein, Wilson, own goal\n", - " 2 April 1988 Newcastle United Away 0–4 20,752 —\n", - " 5 April 1988 Sheffield Wednesday Home 2–2 07,337 McDonough, B. Stein\n", - " 12 April 1988 Manchester United Away 0–3 28,830 —\n", - " 19 April 1988 Queens Park Rangers Home 2–1 06,735 Foster, Wilson (pen)\n", - " 30 April 1988 Norwich City Away 2–2 13,171 M. Stein, Wilson (pen)\n", - " 2 May 1988 Watford Home 2–1 10,409 Oldfield, Wilson (pen)\n", - " 4 May 1988 Tottenham Hotspur Away 1–2 15,437 Grimes\n", - " 7 May 1988 Southampton Away 1–1 12,722 Wilson\n", - " 9 May 1988 Liverpool Away 1–1 30,374 Oldfield\n", - " 13 May 1988 Nottingham Forest Home 1–1 09,108 Donaghy\n", - " 15 May 1988 Nottingham Forest Away 1–1 13,106 Oldfield\n", - "who was the opponent in the first game of the season?\n", - "```json\n", - "{\"PANDA\": \"len(df[df['Took office'] <= df['Left office'].shift()])\"}\n", - "```\n", - " Unnamed: 0 Name Took office Left office Party Notes/Events\n", - " 11 William McCreery March 4, 1803 March 3, 1809 Democratic Republican NaN\n", - " 12 Alexander McKim March 4, 1809 March 3, 1815 Democratic Republican NaN\n", - " 13 William Pinkney March 4, 1815 April 18, 1816 Democratic Republican Resigned to accept position as Minister Plenipotentiary to Russia\n", - " 14 Peter Little September 2, 1816 March 3, 1823 Democratic Republican NaN\n", - " 14 Peter Little March 4, 1823 March 3, 1825 Jacksonian DR NaN\n", - " 14 Peter Little March 4, 1825 March 3, 1829 Adams NaN\n", - " 15 Benjamin C. Howard March 4, 1829 March 3, 1833 Jacksonian NaN\n", - "how many people stayed at least 3 years in office?\n", - "```python\n", - "df['Away team'].iloc[0]\n", - "```\n", - " Tie no Home team Score Away team\n", - " 49 Dalsjöfors GoIF (WC) 1-4 Varbergs GIF (D3)\n", - " 50 Sjömarkens IF (D4) 1-4 BK Slätta Damm (D3)\n", - " 51 IF Tymer (D4) 0-3 Kållereds SK (D3)\n", - " 52 IFK Hjo (WC) 0-4 Nässjö FF (D3)\n", - " 53 Falköpings FK (D4) 2-0 Gånghesters SK (D4)\n", - " 54 Ankarsrums IS (WC) 1-2 Linköpings FF (D3)\n", - " 55 Rödsle BK (D4) 1-0 (gg) Skeninge IK (D4)\n", - " 56 Lindås BK (D4) 1-3 Hultsfreds FK (D3)\n", - " 57 Hvetlanda GIF (D4) 0-1 Åhus Horna BK (D3)\n", - " 58 Bredaryds IK (D4) 3-0 Ulricehamns IFK (D3)\n", - " 59 Hovslätts IK (D4) 0-9 Tidaholms GIF (D2)\n", - " 60 Torpa AIS (D4) 0-2 BK Zeros (D3)\n", - " 61 Fiskeby IF (WC) 2-1 (gg) Västerviks FF (D4)\n", - " 62 Gnösjö IF (D4) 1-3 Skövde AIK (D2)\n", - " 63 Sävsjö FF (D4) 5-3 Skillingaryds IS (D4)\n", - " 64 Boxholms IF (WC) 1-2 Tranås AIF (D3)\n", - " 65 LSW IF (D4) 2-1 Husqvarna FF (D2)\n", - " 66 Lessebo GoIF (D4) 0-1 Listerby IK (D4)\n", - " 67 Rörviks IF (D4) 0-2 Lunds BK (D2)\n", - " 68 Lagans AIK (D4) 0-1 Högaborgs BK (D2)\n", - " 69 IF Eksjö (D4) 1-4 Kalmar FF (D2)\n", - " 70 Limmareds IF (D4) 1-5 Växjö Norra IF (D2)\n", - " 71 Bankeryds SK (D4) 4-1 Hjulsbro IK (D2)\n", - " 72 Skultorps IF (D4) 0-2 BK Forward (D2)\n", - " 73 Gullspångs IF (D4) 0-7 Rynninge IK (D3)\n", - " 74 Skara FC (D4) 0-4 Karlslunds IF (D3)\n", - " 75 Bråtens IK (D4) 0-4 Vivalla-Lundby IF (D3)\n", - " 76 Finnerödja IF(D4) 3-1 IFK Mariestad (D4)\n", - " 77 Sköllersta IF (D4) 1-3 Hemgårdarnas BK (D4)\n", - " 78 Simonstorps IF (D4) 0-5 Nyköpings BIS (D2)\n", - " 79 Ringarums IF (D4) 1-4 Värmbols FC (D4)\n", - " 80 Dagsbergs IF (D4) 1-0 Malmköpings IF (D4)\n", - " 81 Katrineholms SK (D4) 0-2 BK Kenty (D4)\n", - " 82 Härad IF (D4) 2-3 (gg) IFK Västerås (D2)\n", - " 83 Kolsva IF (D4) 0-3 Karlstad BK (D2)\n", - " 84 Laxå IF (D4) 0-4 IF Sylvia (D2)\n", - " 85 Ransta IK (D4) 1-3 IFK Hallsberg (D4)\n", - " 86 Skyllbergs IK (WC) 0-4 IFK Kristinehamn (D4)\n", - " 87 Filipstads FF (D4) 3-1 Kungsörs SK (D4)\n", - " 88 Hallstahammars SK (D4) 0-7 IFK Eskilstuna (D2)\n", - " 89 BK Hird (D4) 0-5 Hargs BK (D2)\n", - " 90 Vretstorps IF (WC) 1-5 IFK Ölme (D3)\n", - " 91 Frövi IK (WC) 1-9 Skiljebo SK (D3)\n", - " 92 IF Rune (WC) 1-3 Gnesta FF (D3)\n", - " 93 Västerås BK 30 (WC) 0-8 Örebro SK Ungdom (D4)\n", - " 94 VoIF Diana (WC) 4-0 Enskede IK (D4)\n", - " 95 New Mill FF (WC) 4-2 Värtans SK (D3)\n", - " 96 Runtuna IK/Löthen (WC) 1-2 Huddinge IF (D3)\n", - "who is the first away team on the chart\n", - "```pandas\n", - "df[df['Name in Turkish'].str.contains('Tuz')]['Depth']\n", - "```\n", - " Name in English Name in Turkish Area (km2) Depth Location (districts and/or provinces)\n", - " Lake Van Van Gölü 3755 km2 451 m Van, Bitlis\n", - " Lake Tuz Tuz Gölü 1500 km2 2 m Aksaray, Ankara, Konya\n", - " Lake Beyşehir Beyşehir Gölü 656 km2 10 m Beyşehir in Konya, Isparta\n", - " Lake Eğirdir Eğirdir Gölü 482 km2 NaN Isparta\n", - " Lake İznik İznik Gölü 308 km2 NaN İznik in Bursa, Yalova\n", - " Lake Burdur Burdur Gölü 200 km2 NaN Burdur, Isparta\n", - " Lake Manyas Manyas Gölü 166 km2 NaN Balıkesir\n", - " Lake Acıgöl Acıgöl 153 km2 NaN Denizli, Afyonkarahisar\n", - " Lake Uluabat Uluabat Gölü 134 km2 1–2 m Bursa\n", - " Lake Çıldır Çıldır Gölü 115 km2 NaN Ardahan, Kars\n", - " Lake Palas Tuzla Palas Tuzla Gölü 106 km2 15 m Palas/Kayseri\n", - " Lake Akşehir Akşehir Gölü 105 km2 NaN Akşehir in Konya, Afyonkarahisar\n", - " Lake Eber Eber Gölü 104 km2 NaN Afyonkarahisar\n", - " Lake Erçek Erçek Gölü 98 km2 NaN Van\n", - " Lake Hazar Hazar Gölü 86 km2 NaN Elazığ\n", - " Lake Bafa Bafa Gölü 60 km2 NaN Aydın, Muğla\n", - " Lake Köyceğiz Köyceğiz Gölü 52 km2 NaN Köyceğiz in Muğla\n", - " Lake Işıklı Işıklı Gölü 49 km2 NaN Denizli\n", - " Lake Nazik Nazik Gölü 48 km2 NaN Bitlis\n", - " Lake Sapanca Sapanca Gölü 47 km2 NaN Sakarya Province\n", - " Lake Salda Salda Gölü 45 km2 184 m Burdur\n", - " Lake Yay Yay Gölü 37 km2 NaN Kayseri\n", - " Lake Akyatan Akyatan Gölü 35 km2 NaN Adana\n", - " Lake Balık Balık Gölü 34 km2 NaN Doğubeyazıt in Ağrı\n", - " Lake Marmara Marmara Gölü 34 km2 NaN Salihli, Gölmarmara in Manisa\n", - " Lake Çöl Çöl Gölü 32 km2 NaN Ankara\n", - "Lake Durusu (Lake Terkos) Durusu Gölü 25 km2 NaN İstanbul\n", - " Lake Karine Karine Gölü 24 km2 NaN NaN\n", - " Lake Tuzla Tuzla Gölü 23 km2 NaN Tuzla\n", - " Lake Küçükçekmece Küçükçekmece Gölü 16 km2 NaN Küçükçekmece, İstanbul\n", - " Lake Yaraşlı Yaraşlı Gölü 16 km2 NaN Burdur\n", - " Lake Haçlı Haçlı Gölü 16 km2 NaN Muş\n", - " Lake Seyfe Seyfe Gölü 15 km2 NaN Kırşehir\n", - " Lake Akyayan Akyayan Gölü 15 km2 NaN NaN\n", - " Lake Hozapin Hozapin Gölü 14 km2 NaN NaN\n", - " Lake Arin Arin Gölü 13 km2 NaN NaN\n", - " Lake Nemrut Nemrut Gölü 12 km2 NaN Bitlis Province\n", - " Lake Balık Balık Gölü 12 km2 NaN NaN\n", - " Lake Büyükçekmece Büyükçekmece Gölü 11 km2 NaN Büyükçekmece, Istanbul\n", - " Lake Boluk Boluk Gölü 11 km2 NaN NaN\n", - " Lake Akdoğan Akdoğan Gölü 11 km2 NaN NaN\n", - " Lake Çavuşlu Çavuşlu Gölü 9 km2 NaN NaN\n", - " Lake Düden Düden Gölü 8 km2 NaN NaN\n", - " Lake Gala Gala Gölü 8 km2 NaN Edirne\n", - " Lake Karataş Karataş Gölü 6 km2 NaN NaN\n", - " Lake Mogan Mogan Gölü 6 km2 NaN Ankara\n", - " Paradeniz Paradeniz 4 km2 NaN Mersin\n", - " Lake Eymir Eymir Gölü 1.8 km2 NaN Ankara\n", - " Lake Abant Abant Gölü 1.28 km2 18 m Bolu\n", - " Lake Gölcük Gölcük Gölü 1 km2 NaN İzmir\n", - "which is deeper, lake tuz or lake palas tuzla?\n", - "```json\n", - "df.query('Hand == \"Full house\" & 2 credits == 4 & theoretical_return > 98.68%')['Hand']\n", - "```\n", - " Hand 1 credit 2 credits 3 credits 4 credits 5 credits\n", - " Royal flush 250 500 750 1000 4000*\n", - " Straight flush 60 120 180 240 400\n", - " Four aces 400 800 1200 1600 2000\n", - "Four of a kind, 2-4 100 200 300 400 500\n", - "Four of a kind, 5-K 50 100 150 200 250\n", - " Full house 8 16 24 32 40\n", - " Flush 5 10 15 20 25\n", - " Straight 4 8 12 16 20\n", - " Three of a kind 3 6 9 12 15\n", - " Two pair 1 2 3 4 5\n", - " Jacks or better 1 2 3 4 5\n", - " Theoretical return 98.68% 98.68% 98.68% 98.68% 99.92%*\n", - "after winning on four credits with a full house, what is your payout?\n", - "```pandas\n", - "df[df['Position'] == 'Middle blocker']\n", - "```\n", - " No. Player Birth Date Weight Height Position Current Club\n", - " 4 Ardo Kreek August 7, 1986 (age 27) 96 203 Middle blocker Paris Volley\n", - " 5 Kert Toobal June 3, 1979 (age 35) 78 189 Setter Sivas 4 Eylül\n", - " 6 Martti Juhkami June 6, 1988 (age 26) 96 196 Spiker TV Bühl\n", - " 7 Argo Meresaar January 13, 1980 (age 34) 107 206 Opposite Bigbank Tartu\n", - " 8 Kusti Nõlvak November 6, 1991 (age 22) 81 186 Setter TTÜ VK\n", - " 9 Robert Täht August 15, 1993 (age 20) 80 190 Spiker Bigbank Tartu\n", - " 11 Oliver Venno May 23, 1990 (age 24) 105 210 Opposite Rennes Volley 35\n", - " 14 Rait Rikberg August 30, 1982 (age 31) 80 174 Libero Bigbank Tartu\n", - " 16 Edgar Järvekülg June 12, 1988 (age 26) 77 186 Libero Pärnu VK\n", - " 17 Siim Ennemuist December 5, 1989 (age 24) 89 196 Middle blocker TTÜ VK\n", - " 18 Jaanus Nõmmsalu January 19, 1981 (age 33) 94 200 Spiker TTÜ VK\n", - " 19 Andri Aganits September 7, 1993 (age 20) 99 207 Middle Blocker TV Bühl\n", - "which players played the same position as ardo kreek?\n" - ] - } - ], - "source": [ - "for i in range(n):\n", - " print(code[i])\n", - " print(pd.read_csv('data/' + train.context.iloc[i]).to_string(index=False))\n", - " print(train.utterance.iloc[i])" + "history" ] }, { "cell_type": "code", - "execution_count": 87, - "id": "2bb82209-c50e-4a31-a10a-77be411f4b4d", + "execution_count": 289, + "id": "808bdf6b-df4c-45b6-801f-be5fcc6b6bb4", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ + "=== Таблица 1/20 (Индекс в train: 10476) ===\n", + "Оригинальная таблица: csv/204-csv/561.csv\n", + "Размер: 35 строк, 7 колонок\n", + "--------------------------------------------------\n", + "Оригинальная таблица:\n", + " Fence Name Jockey Age Handicap (st-lb) \\\n", + "0 22 What A Myth Paul Kelleway 9 11-4 \n", + "1 03 Rough Tweed Pat Buckley 12 10-7 \n", + "2 ? Kapeno David Mould 9 10-6 \n", + "3 06 Packed Home Tommy Carberry 11 10-3 \n", + "4 22 Greek Scholar Michael Scudamore 7 10-4 \n", + "5 ? Brown Diamond Frank Shortt 11 10-0 \n", + "6 06 Popham Down Willie Robinson 9 10-0 \n", + "7 ? Pontin-Go TM Jones 14 10-0 \n", + "8 15 Game Purston Paddy Cowley 8 10-0 \n", + "9 22 Supersweet Mr D Crossley-Cooke 9 10-6 \n", + "10 ? Major Hitch Paddy Broderick 8 10-1 \n", + "11 15 Irish Day Jimmy Magee 10 10-0 \n", + "12 ? Scottish Final J Gamble 9 10-0 \n", + "13 ? Flamecap F Carroll 9 10-0 \n", + "14 15 Black Spot Jeremy Speid-Soote 9 10-0 \n", + "15 ? Harry Black R Court 9 10-0 \n", + "16 ? Flying Wild Pat Taaffe 10 11-0 \n", + "17 ? Stirling HR Beasley 10 10-11 \n", + "18 ? Vultrix Stan Mellor 8 10-7 \n", + "19 ? Solimyth Mr John Lawrence 10 10-1 \n", + "20 ? April Rose Major Piers Bengough 11 10-7 \n", + "21 ? L'Empereur Beltrán Osorio 12 10-2 \n", + "22 14 Willow King Liam McLoughlin 11 10-0 \n", + "23 ? Royal Ryse T Hyde 8 10-0 \n", + "24 ? Mac's Flare R Langley 10 10-0 \n", + "25 ? King Pin Mr Tim Durant 10 10-11 \n", + "26 ? In Haste J Leech 8 10-3 \n", + "27 ? My Gift A Redmond 10 10-0 \n", + "28 ? Bold Biri J Lehane 10 10-0 \n", + "29 22 Valouis E Prendergast 7 10-0 \n", + "30 22 Leslie Mr JM Opperman 10 10-5 \n", + "31 ? Dorimont Mr W Shand-Kydd 12 10-0 \n", + "32 15 Fujino-O Jeff King 7 12-0 \n", + "33 ? Monarch's Thought G Cramp 12 10-0 \n", + "34 06 Groomsman Mr S Roberts 11 11-0 \n", + "\n", + " Starting price Fate \n", + "0 11/2 Fell \n", + "1 22/1 Fell \n", + "2 100/7 Fell \n", + "3 33/1 Fell \n", + "4 50/1 Fell \n", + "5 100/1 Fell \n", + "6 22/1 Fell \n", + "7 100/1 Fell \n", + "8 100/1 Fell \n", + "9 100/1 Fell \n", + "10 50/1 Fell \n", + "11 40/1 Fell \n", + "12 100/1 Fell \n", + "13 100/1 Fell \n", + "14 100/1 Fell \n", + "15 100/1 Fell \n", + "16 20/1 Pulled Up \n", + "17 28/1 Pulled Up \n", + "18 100/7 Pulled Up \n", + "19 100/1 Pulled Up \n", + "20 100/1 Pulled Up \n", + "21 100/1 Pulled Up \n", + "22 100/1 Pulled Up \n", + "23 100/1 Pulled Up \n", + "24 100/1 Pulled Up \n", + "25 100/1 Pulled Up \n", + "26 100/1 Pulled Up \n", + "27 100/1 Pulled Up \n", + "28 100/1 Pulled Up \n", + "29 50/1 Brought Down \n", + "30 100/1 Brought Down \n", + "31 50/1 Unseated Rider \n", + "32 100/1 Refused \n", + "33 100/1 Refused \n", + "34 100/1 Refused \n", + "Очищенная таблица:\n", + " Fence Name Jockey Age Handicap (st-lb) \\\n", + "0 22 What A Myth Paul Kelleway 9 11-4 \n", + "1 03 Rough Tweed Pat Buckley 12 10-7 \n", + "2 ? Kapeno David Mould 9 10-6 \n", + "3 06 Packed Home Tommy Carberry 11 10-3 \n", + "4 22 Greek Scholar Michael Scudamore 7 10-4 \n", + "5 ? Brown Diamond Frank Shortt 11 10-0 \n", + "6 06 Popham Down Willie Robinson 9 10-0 \n", + "7 ? Pontin-Go TM Jones 14 10-0 \n", + "8 15 Game Purston Paddy Cowley 8 10-0 \n", + "9 22 Supersweet Mr D Crossley-Cooke 9 10-6 \n", + "10 ? Major Hitch Paddy Broderick 8 10-1 \n", + "11 15 Irish Day Jimmy Magee 10 10-0 \n", + "12 ? Scottish Final J Gamble 9 10-0 \n", + "13 ? Flamecap F Carroll 9 10-0 \n", + "14 15 Black Spot Jeremy Speid-Soote 9 10-0 \n", + "15 ? Harry Black R Court 9 10-0 \n", + "16 ? Flying Wild Pat Taaffe 10 11-0 \n", + "17 ? Stirling HR Beasley 10 10-11 \n", + "18 ? Vultrix Stan Mellor 8 10-7 \n", + "19 ? Solimyth Mr John Lawrence 10 10-1 \n", + "20 ? April Rose Major Piers Bengough 11 10-7 \n", + "21 ? L'Empereur Beltrán Osorio 12 10-2 \n", + "22 14 Willow King Liam McLoughlin 11 10-0 \n", + "23 ? Royal Ryse T Hyde 8 10-0 \n", + "24 ? Mac's Flare R Langley 10 10-0 \n", + "25 ? King Pin Mr Tim Durant 10 10-11 \n", + "26 ? In Haste J Leech 8 10-3 \n", + "27 ? My Gift A Redmond 10 10-0 \n", + "28 ? Bold Biri J Lehane 10 10-0 \n", + "29 22 Valouis E Prendergast 7 10-0 \n", + "30 22 Leslie Mr JM Opperman 10 10-5 \n", + "31 ? Dorimont Mr W Shand-Kydd 12 10-0 \n", + "32 15 Fujino-O Jeff King 7 12-0 \n", + "33 ? Monarch's Thought G Cramp 12 10-0 \n", + "34 06 Groomsman Mr S Roberts 11 11-0 \n", + "\n", + " Starting price Fate Handicap (st-lb)_score1 \\\n", + "0 11/2 Fell 11 \n", + "1 22/1 Fell 10 \n", + "2 100/7 Fell 10 \n", + "3 33/1 Fell 10 \n", + "4 50/1 Fell 10 \n", + "5 100/1 Fell 10 \n", + "6 22/1 Fell 10 \n", + "7 100/1 Fell 10 \n", + "8 100/1 Fell 10 \n", + "9 100/1 Fell 10 \n", + "10 50/1 Fell 10 \n", + "11 40/1 Fell 10 \n", + "12 100/1 Fell 10 \n", + "13 100/1 Fell 10 \n", + "14 100/1 Fell 10 \n", + "15 100/1 Fell 10 \n", + "16 20/1 Pulled Up 11 \n", + "17 28/1 Pulled Up 10 \n", + "18 100/7 Pulled Up 10 \n", + "19 100/1 Pulled Up 10 \n", + "20 100/1 Pulled Up 10 \n", + "21 100/1 Pulled Up 10 \n", + "22 100/1 Pulled Up 10 \n", + "23 100/1 Pulled Up 10 \n", + "24 100/1 Pulled Up 10 \n", + "25 100/1 Pulled Up 10 \n", + "26 100/1 Pulled Up 10 \n", + "27 100/1 Pulled Up 10 \n", + "28 100/1 Pulled Up 10 \n", + "29 50/1 Brought Down 10 \n", + "30 100/1 Brought Down 10 \n", + "31 50/1 Unseated Rider 10 \n", + "32 100/1 Refused 12 \n", + "33 100/1 Refused 10 \n", + "34 100/1 Refused 11 \n", + "\n", + " Handicap (st-lb)_score2 \n", + "0 4 \n", + "1 7 \n", + "2 6 \n", + "3 3 \n", + "4 4 \n", + "5 0 \n", + "6 0 \n", + "7 0 \n", + "8 0 \n", + "9 6 \n", + "10 1 \n", + "11 0 \n", + "12 0 \n", + "13 0 \n", + "14 0 \n", + "15 0 \n", + "16 0 \n", + "17 11 \n", + "18 7 \n", + "19 1 \n", + "20 7 \n", + "21 2 \n", + "22 0 \n", + "23 0 \n", + "24 0 \n", + "25 11 \n", + "26 3 \n", + "27 0 \n", + "28 0 \n", + "29 0 \n", + "30 5 \n", + "31 0 \n", + "32 0 \n", + "33 0 \n", + "34 0 \n", + "\n", + "==================================================\n", + "\n", + "=== Таблица 2/20 (Индекс в train: 1824) ===\n", + "Оригинальная таблица: csv/203-csv/694.csv\n", + "Размер: 11 строк, 5 колонок\n", + "--------------------------------------------------\n", + "Оригинальная таблица:\n", + " # Name Took Office Left Office \\\n", + "0 1 Nejat Konuk, 2nd time 15 November 1983 19 July 1985 \n", + "1 2 Derviş Eroğlu, 1st time 19 July 1985 1 January 1994 \n", + "2 3 Hakkı Atun 1 January 1994 16 August 1996 \n", + "3 (2) Derviş Eroğlu, 2nd time 16 August 1996 13 January 2004 \n", + "4 4 Mehmet Ali Talat 13 January 2004 26 April 2005 \n", + "5 5 Ferdi Sabit Soyer 26 April 2005 5 May 2009 \n", + "6 (2) Derviş Eroğlu, 3rd time 5 May 2009 23 April 2010 \n", + "7 — Hüseyin Özgürgün (acting) 23 April 2010 17 May 2010 \n", + "8 6 İrsen Küçük 17 May 2010 13 June 2013 \n", + "9 7 Sibel Siber 13 June 2013 2 September 2013 \n", + "10 8 Özkan Yorgancıoğlu 2 September 2013 Incumbent \n", + "\n", + " Party \n", + "0 National Unity Party \n", + "1 National Unity Party \n", + "2 Democratic Party \n", + "3 National Unity Party \n", + "4 Republican Turkish Party \n", + "5 Republican Turkish Party \n", + "6 National Unity Party \n", + "7 National Unity Party \n", + "8 National Unity Party \n", + "9 Republican Turkish Party \n", + "10 Republican Turkish Party \n", + "Очищенная таблица:\n", + " # Name Took Office Left Office \\\n", + "0 1 Nejat Konuk, 2nd time 15 November 1983 19 July 1985 \n", + "1 2 Derviş Eroğlu, 1st time 19 July 1985 1 January 1994 \n", + "2 3 Hakkı Atun 1 January 1994 16 August 1996 \n", + "3 (2) Derviş Eroğlu, 2nd time 16 August 1996 13 January 2004 \n", + "4 4 Mehmet Ali Talat 13 January 2004 26 April 2005 \n", + "5 5 Ferdi Sabit Soyer 26 April 2005 5 May 2009 \n", + "6 (2) Derviş Eroğlu, 3rd time 5 May 2009 23 April 2010 \n", + "7 NaN Hüseyin Özgürgün (acting) 23 April 2010 17 May 2010 \n", + "8 6 İrsen Küçük 17 May 2010 13 June 2013 \n", + "9 7 Sibel Siber 13 June 2013 2 September 2013 \n", + "10 8 Özkan Yorgancıoğlu 2 September 2013 Incumbent \n", + "\n", + " Party \n", + "0 National Unity Party \n", + "1 National Unity Party \n", + "2 Democratic Party \n", + "3 National Unity Party \n", + "4 Republican Turkish Party \n", + "5 Republican Turkish Party \n", + "6 National Unity Party \n", + "7 National Unity Party \n", + "8 National Unity Party \n", + "9 Republican Turkish Party \n", + "10 Republican Turkish Party \n", + "\n", + "==================================================\n", + "\n", + "=== Таблица 3/20 (Индекс в train: 409) ===\n", + "Оригинальная таблица: csv/204-csv/864.csv\n", + "Размер: 24 строк, 7 колонок\n", + "--------------------------------------------------\n", + "Оригинальная таблица:\n", + " Pos Class No Team \\\n", + "0 32 S\\n2.0 27 Société ROC \n", + "1 33 S\\n2.0 29 Société ROC \n", + "2 34 S\\n3.0 3 Christian Poirot \n", + "3 35 GT 59 Gelo Racing Team \n", + "4 36 S\\n3.0 1 Wicky Racing Team \n", + "5 37 S\\n3.0 6 Gitanes Automobiles Ligier \n", + "6 38 GT 52 Écurie du Nord \n", + "7 39 T 98 Auto Mazda Claude Bouchet \n", + "8 40 GT 57 Ganto Racing \n", + "9 41 GT 60 Gelo Racing Team \n", + "10 42 GT 68 Guy Verrier \n", + "11 43 GT 7 Beurlys International Auto \n", + "12 44 T 93 Hervé Poulain \n", + "13 45 S\\n2.0 26 Elf Switzerland \n", + "14 46 S\\n3.0 18 Sigma Automotive Co. Ltd. \n", + "15 47 GT 16 Joest Racing / Tebernum \n", + "16 48 GT 96 Bonnemaison - Thiaw \n", + "17 49 T 95 Shark Team \n", + "18 50 S\\n3.0 97 Gitanes Automobiles Ligier \n", + "19 51 S\\n3.0 12 Racing Team Schulthess \n", + "20 52 S\\n2.0 40 Philippe Mettetal \n", + "21 53 GT\\nSer. 83 Jean-Yves Gadal \n", + "22 54 T 90 Jean-Claude Aubriet \n", + "23 55 GT 42 Henri Greder \n", + "\n", + " Drivers \\\n", + "0 Laurent Ferrier\\n Xavier Lapeyre\\n Christian E... \n", + "1 Pierre-Marie Painvin\\n Franz Hummel \n", + "2 Christian Poirot\\n Gérard Cuynet\\n Guillermo O... \n", + "3 Tim Schenken\\n Howden Ganley \n", + "4 Max Cohen-Olivar\\n Philippe Coran\\n Joël Brachet \n", + "5 Henri Pescarolo\\n François Migault \n", + "6 William Vollery\\n Roger Dorchy\\n Eric Chapuis \n", + "7 Claude Bouchet\\n Jean Rondeau \n", + "8 John Rulon-Miller\\n Tom Waugh\\n Serge Godard \n", + "9 Toine Hezemans\\n Manfred Schurti \n", + "10 Guy Verrier\\n Florian Vetsch\\n Jean-Robert Cor... \n", + "11 Pietro Polese\\n \\Willer\\\"\" \n", + "12 Hervé Poulain\\n Sam Posey\\n Jean Guichet \n", + "13 Marie-Claude Charmasson\\n Lella Lombardi \n", + "14 Hiroshi Fushida\\n Harukuni Takahashi \n", + "15 Clemens Schickentanz\\n Hartwig Bertrams \n", + "16 Lucien Nageotte\\n Gérard Picard \n", + "17 Jean-Claude Guérie\\n Dominique Fornage \n", + "18 Jean-Pierre Beltoise\\n Jean-Pierre Jarier \n", + "19 Hervé Bayard\\n Heinz Schulthess \n", + "20 Jean Ragnotti\\n Michel Lateste \n", + "21 Jean-Yves Gadal\\n \\Ségolen\\\"\" \n", + "22 Jean-Claude Aubriet\\n \\Depnic\\\"\" \n", + "23 Henri Greder\\n Alain Cudini \n", + "\n", + " Chassis Engine \n", + "0 Lola T294 ROC-Simca 2.0L I4 \n", + "1 Lola T292 ROC-Simca 2.0L I4 \n", + "2 Porsche 908/2 Porsche 3.0L Flat-8 \n", + "3 Porsche 911 Carrera RSR Porsche 3.0L Flat-6 \n", + "4 Porsche 908/2 Porsche 3.0L Flat-8 \n", + "5 Ligier JS2 Ford Cosworth DFV 3.0L V8 \n", + "6 Porsche 911 Carrera RSR Porsche 3.0L Flat-6 \n", + "7 Mazda RX-3 Mazda 12A 1.2L 2-Rotor \n", + "8 Porsche 911 Carrera RSR Porsche 3.0L Flat-6 \n", + "9 Porsche 911 Carrera RSR Porsche 3.0L Flat-6 \n", + "10 Porsche 911 Carrera RS Porsche 3.0L Flat-6 \n", + "11 De Tomaso Pantera Ford 5.8L V8 \n", + "12 BMW 3.0CSL BMW 3.5L I6 \n", + "13 Renault-Alpine A441 Renault 2.0L V6 \n", + "14 Sigma MC75 Toyota 2.3L Turbo I4 \n", + "15 Porsche 911 Carrera RSR Porsche 3.0L Flat-6 \n", + "16 Porsche 911 Carrera RSR Porsche 3.0L Flat-6 \n", + "17 Ford Capri RS Ford 3.0L V6 \n", + "18 Ligier JS2 Maserati 3.0L V6 \n", + "19 Lola T284 Ford Cosworth DFV 3.0L V8 \n", + "20 Tecma 755 Ford 1.8L I4 \n", + "21 Porsche 911 Carrera RS Porsche 2.6L Flat-6 \n", + "22 BMW 3.0CSL BMW 3.5L I6 \n", + "23 Chevrolet Corvette Chevrolet 7.0L V8 \n", + "Очищенная таблица:\n", + " Pos Class No Team \\\n", + "0 32 S\\n2.0 27 Société ROC \n", + "1 33 S\\n2.0 29 Société ROC \n", + "2 34 S\\n3.0 3 Christian Poirot \n", + "3 35 GT 59 Gelo Racing Team \n", + "4 36 S\\n3.0 1 Wicky Racing Team \n", + "5 37 S\\n3.0 6 Gitanes Automobiles Ligier \n", + "6 38 GT 52 Écurie du Nord \n", + "7 39 T 98 Auto Mazda Claude Bouchet \n", + "8 40 GT 57 Ganto Racing \n", + "9 41 GT 60 Gelo Racing Team \n", + "10 42 GT 68 Guy Verrier \n", + "11 43 GT 7 Beurlys International Auto \n", + "12 44 T 93 Hervé Poulain \n", + "13 45 S\\n2.0 26 Elf Switzerland \n", + "14 46 S\\n3.0 18 Sigma Automotive Co. Ltd. \n", + "15 47 GT 16 Joest Racing / Tebernum \n", + "16 48 GT 96 Bonnemaison - Thiaw \n", + "17 49 T 95 Shark Team \n", + "18 50 S\\n3.0 97 Gitanes Automobiles Ligier \n", + "19 51 S\\n3.0 12 Racing Team Schulthess \n", + "20 52 S\\n2.0 40 Philippe Mettetal \n", + "21 53 GT\\nSer. 83 Jean-Yves Gadal \n", + "22 54 T 90 Jean-Claude Aubriet \n", + "23 55 GT 42 Henri Greder \n", + "\n", + " Drivers \\\n", + "0 Laurent Ferrier\\n Xavier Lapeyre\\n Christian E... \n", + "1 Pierre-Marie Painvin\\n Franz Hummel \n", + "2 Christian Poirot\\n Gérard Cuynet\\n Guillermo O... \n", + "3 Tim Schenken\\n Howden Ganley \n", + "4 Max Cohen-Olivar\\n Philippe Coran\\n Joël Brachet \n", + "5 Henri Pescarolo\\n François Migault \n", + "6 William Vollery\\n Roger Dorchy\\n Eric Chapuis \n", + "7 Claude Bouchet\\n Jean Rondeau \n", + "8 John Rulon-Miller\\n Tom Waugh\\n Serge Godard \n", + "9 Toine Hezemans\\n Manfred Schurti \n", + "10 Guy Verrier\\n Florian Vetsch\\n Jean-Robert Cor... \n", + "11 Pietro Polese\\n \\Willer\\\"\" \n", + "12 Hervé Poulain\\n Sam Posey\\n Jean Guichet \n", + "13 Marie-Claude Charmasson\\n Lella Lombardi \n", + "14 Hiroshi Fushida\\n Harukuni Takahashi \n", + "15 Clemens Schickentanz\\n Hartwig Bertrams \n", + "16 Lucien Nageotte\\n Gérard Picard \n", + "17 Jean-Claude Guérie\\n Dominique Fornage \n", + "18 Jean-Pierre Beltoise\\n Jean-Pierre Jarier \n", + "19 Hervé Bayard\\n Heinz Schulthess \n", + "20 Jean Ragnotti\\n Michel Lateste \n", + "21 Jean-Yves Gadal\\n \\Ségolen\\\"\" \n", + "22 Jean-Claude Aubriet\\n \\Depnic\\\"\" \n", + "23 Henri Greder\\n Alain Cudini \n", + "\n", + " Chassis Engine \n", + "0 Lola T294 ROC-Simca 2.0L I4 \n", + "1 Lola T292 ROC-Simca 2.0L I4 \n", + "2 Porsche 908/2 Porsche 3.0L Flat-8 \n", + "3 Porsche 911 Carrera RSR Porsche 3.0L Flat-6 \n", + "4 Porsche 908/2 Porsche 3.0L Flat-8 \n", + "5 Ligier JS2 Ford Cosworth DFV 3.0L V8 \n", + "6 Porsche 911 Carrera RSR Porsche 3.0L Flat-6 \n", + "7 Mazda RX-3 Mazda 12A 1.2L 2-Rotor \n", + "8 Porsche 911 Carrera RSR Porsche 3.0L Flat-6 \n", + "9 Porsche 911 Carrera RSR Porsche 3.0L Flat-6 \n", + "10 Porsche 911 Carrera RS Porsche 3.0L Flat-6 \n", + "11 De Tomaso Pantera Ford 5.8L V8 \n", + "12 BMW 3.0CSL BMW 3.5L I6 \n", + "13 Renault-Alpine A441 Renault 2.0L V6 \n", + "14 Sigma MC75 Toyota 2.3L Turbo I4 \n", + "15 Porsche 911 Carrera RSR Porsche 3.0L Flat-6 \n", + "16 Porsche 911 Carrera RSR Porsche 3.0L Flat-6 \n", + "17 Ford Capri RS Ford 3.0L V6 \n", + "18 Ligier JS2 Maserati 3.0L V6 \n", + "19 Lola T284 Ford Cosworth DFV 3.0L V8 \n", + "20 Tecma 755 Ford 1.8L I4 \n", + "21 Porsche 911 Carrera RS Porsche 2.6L Flat-6 \n", + "22 BMW 3.0CSL BMW 3.5L I6 \n", + "23 Chevrolet Corvette Chevrolet 7.0L V8 \n", + "\n", + "==================================================\n", + "\n", + "=== Таблица 4/20 (Индекс в train: 12149) ===\n", + "Оригинальная таблица: csv/203-csv/56.csv\n", + "Размер: 19 строк, 8 колонок\n", + "--------------------------------------------------\n", + "Оригинальная таблица:\n", + " Star Start\\nyear End\\nyear Maximum\\nyear \\\n", + "0 Epsilon Canis Majoris ... -4,460,000 -4,700,000 \n", + "1 Beta Canis Majoris -4,460,000 -3,700,000 -4,420,000 \n", + "2 Canopus (first time) -3,700,000 -1,370,000 -3,110,000 \n", + "3 Zeta Sagittarii -1,370,000 -1,080,000 -1,200,000 \n", + "4 Zeta Leporis -1,080,000 -950,000 -1,050,000 \n", + "5 Canopus (second time) -950,000 -420,000 -950,000 \n", + "6 Aldebaran -420,000 -210,000 -320,000 \n", + "7 Capella -210,000 -160,000 -240,000 \n", + "8 Canopus (third time) -160,000 -90,000 -160,000 \n", + "9 Sirius (current) -90,000 +210,000 +60,000 \n", + "10 Vega +210,000 +480,000 +290,000 \n", + "11 Canopus (fourth time) +480,000 +990,000 +480,000 \n", + "12 Beta Aurigae +990,000 +1,150,000 +1,190,000 \n", + "13 Delta Scuti +1,150,000 +1,330,000 +1,250,000 \n", + "14 Gamma Draconis +1,330,000 +2,030,000 +1,550,000 \n", + "15 Upsilon Librae +2,030,000 +2,670,000 +2,290,000 \n", + "16 NR Canis Majoris +2,670,000 +3,050,000 +2,870,000 \n", + "17 Omicron Herculis +3,050,000 +3,870,000 +3,470,000 \n", + "18 Beta Cygni +3,870,000 ... +4,610,000 \n", + "\n", + " Maximum\\nmagnitude Distance at\\nmaximum (LY) Current\\ndistance \\\n", + "0 -3.99 34.0 430.00 \n", + "1 -3.65 37.0 500.00 \n", + "2 -1.86 177.0 310.00 \n", + "3 -2.74 8.0 89.10 \n", + "4 -2.05 5.3 70.00 \n", + "5 -1.09 252.0 310.00 \n", + "6 -1.54 21.5 65.00 \n", + "7 -0.82 27.9 42.20 \n", + "8 -0.70 302.0 310.00 \n", + "9 -1.64 7.8 8.60 \n", + "10 -0.81 17.2 25.04 \n", + "11 -0.40 346.0 310.00 \n", + "12 -0.40 28.5 82.10 \n", + "13 -1.84 9.2 187.00 \n", + "14 -1.39 27.7 154.00 \n", + "15 -0.46 30.0 195.00 \n", + "16 -0.88 14.0 280.00 \n", + "17 -0.63 44.0 346.00 \n", + "18 -0.52 80.0 390.00 \n", + "\n", + " Current\\nmagnitude \n", + "0 1.50 \n", + "1 1.99 \n", + "2 -0.72 \n", + "3 2.60 \n", + "4 3.55 \n", + "5 -0.72 \n", + "6 0.85 \n", + "7 0.08 \n", + "8 -0.72 \n", + "9 -1.46 \n", + "10 0.03 \n", + "11 -0.72 \n", + "12 1.90 \n", + "13 4.72 \n", + "14 2.36 \n", + "15 3.60 \n", + "16 5.60 \n", + "17 3.83 \n", + "18 3.18 \n", + "Очищенная таблица:\n", + " Star Start year End year Maximum year \\\n", + "0 Epsilon Canis Majoris NaN -4460000.0 -4700000 \n", + "1 Beta Canis Majoris -4460000.0 -3700000.0 -4420000 \n", + "2 Canopus (first time) -3700000.0 -1370000.0 -3110000 \n", + "3 Zeta Sagittarii -1370000.0 -1080000.0 -1200000 \n", + "4 Zeta Leporis -1080000.0 -950000.0 -1050000 \n", + "5 Canopus (second time) -950000.0 -420000.0 -950000 \n", + "6 Aldebaran -420000.0 -210000.0 -320000 \n", + "7 Capella -210000.0 -160000.0 -240000 \n", + "8 Canopus (third time) -160000.0 -90000.0 -160000 \n", + "9 Sirius (current) -90000.0 210000.0 60000 \n", + "10 Vega 210000.0 480000.0 290000 \n", + "11 Canopus (fourth time) 480000.0 990000.0 480000 \n", + "12 Beta Aurigae 990000.0 1150000.0 1190000 \n", + "13 Delta Scuti 1150000.0 1330000.0 1250000 \n", + "14 Gamma Draconis 1330000.0 2030000.0 1550000 \n", + "15 Upsilon Librae 2030000.0 2670000.0 2290000 \n", + "16 NR Canis Majoris 2670000.0 3050000.0 2870000 \n", + "17 Omicron Herculis 3050000.0 3870000.0 3470000 \n", + "18 Beta Cygni 3870000.0 NaN 4610000 \n", + "\n", + " Maximum magnitude Distance at maximum (LY) Current distance \\\n", + "0 -3.99 34.0 430.00 \n", + "1 -3.65 37.0 500.00 \n", + "2 -1.86 177.0 310.00 \n", + "3 -2.74 8.0 89.10 \n", + "4 -2.05 5.3 70.00 \n", + "5 -1.09 252.0 310.00 \n", + "6 -1.54 21.5 65.00 \n", + "7 -0.82 27.9 42.20 \n", + "8 -0.70 302.0 310.00 \n", + "9 -1.64 7.8 8.60 \n", + "10 -0.81 17.2 25.04 \n", + "11 -0.40 346.0 310.00 \n", + "12 -0.40 28.5 82.10 \n", + "13 -1.84 9.2 187.00 \n", + "14 -1.39 27.7 154.00 \n", + "15 -0.46 30.0 195.00 \n", + "16 -0.88 14.0 280.00 \n", + "17 -0.63 44.0 346.00 \n", + "18 -0.52 80.0 390.00 \n", + "\n", + " Current magnitude \n", + "0 1.50 \n", + "1 1.99 \n", + "2 -0.72 \n", + "3 2.60 \n", + "4 3.55 \n", + "5 -0.72 \n", + "6 0.85 \n", + "7 0.08 \n", + "8 -0.72 \n", + "9 -1.46 \n", + "10 0.03 \n", + "11 -0.72 \n", + "12 1.90 \n", + "13 4.72 \n", + "14 2.36 \n", + "15 3.60 \n", + "16 5.60 \n", + "17 3.83 \n", + "18 3.18 \n", + "\n", + "==================================================\n", + "\n", + "=== Таблица 5/20 (Индекс в train: 4506) ===\n", + "Оригинальная таблица: csv/203-csv/694.csv\n", + "Размер: 11 строк, 5 колонок\n", + "--------------------------------------------------\n", + "Оригинальная таблица:\n", + " # Name Took Office Left Office \\\n", + "0 1 Nejat Konuk, 2nd time 15 November 1983 19 July 1985 \n", + "1 2 Derviş Eroğlu, 1st time 19 July 1985 1 January 1994 \n", + "2 3 Hakkı Atun 1 January 1994 16 August 1996 \n", + "3 (2) Derviş Eroğlu, 2nd time 16 August 1996 13 January 2004 \n", + "4 4 Mehmet Ali Talat 13 January 2004 26 April 2005 \n", + "5 5 Ferdi Sabit Soyer 26 April 2005 5 May 2009 \n", + "6 (2) Derviş Eroğlu, 3rd time 5 May 2009 23 April 2010 \n", + "7 — Hüseyin Özgürgün (acting) 23 April 2010 17 May 2010 \n", + "8 6 İrsen Küçük 17 May 2010 13 June 2013 \n", + "9 7 Sibel Siber 13 June 2013 2 September 2013 \n", + "10 8 Özkan Yorgancıoğlu 2 September 2013 Incumbent \n", + "\n", + " Party \n", + "0 National Unity Party \n", + "1 National Unity Party \n", + "2 Democratic Party \n", + "3 National Unity Party \n", + "4 Republican Turkish Party \n", + "5 Republican Turkish Party \n", + "6 National Unity Party \n", + "7 National Unity Party \n", + "8 National Unity Party \n", + "9 Republican Turkish Party \n", + "10 Republican Turkish Party \n", + "Очищенная таблица:\n", + " # Name Took Office Left Office \\\n", + "0 1 Nejat Konuk, 2nd time 15 November 1983 19 July 1985 \n", + "1 2 Derviş Eroğlu, 1st time 19 July 1985 1 January 1994 \n", + "2 3 Hakkı Atun 1 January 1994 16 August 1996 \n", + "3 (2) Derviş Eroğlu, 2nd time 16 August 1996 13 January 2004 \n", + "4 4 Mehmet Ali Talat 13 January 2004 26 April 2005 \n", + "5 5 Ferdi Sabit Soyer 26 April 2005 5 May 2009 \n", + "6 (2) Derviş Eroğlu, 3rd time 5 May 2009 23 April 2010 \n", + "7 NaN Hüseyin Özgürgün (acting) 23 April 2010 17 May 2010 \n", + "8 6 İrsen Küçük 17 May 2010 13 June 2013 \n", + "9 7 Sibel Siber 13 June 2013 2 September 2013 \n", + "10 8 Özkan Yorgancıoğlu 2 September 2013 Incumbent \n", "\n", - "df[df['Position'] == '1st']['Venue'].iloc[-1]\n", + " Party \n", + "0 National Unity Party \n", + "1 National Unity Party \n", + "2 Democratic Party \n", + "3 National Unity Party \n", + "4 Republican Turkish Party \n", + "5 Republican Turkish Party \n", + "6 National Unity Party \n", + "7 National Unity Party \n", + "8 National Unity Party \n", + "9 Republican Turkish Party \n", + "10 Republican Turkish Party \n", "\n", + "==================================================\n", "\n", - "df['Opponent'][0]\n", + "=== Таблица 6/20 (Индекс в train: 4012) ===\n", + "Оригинальная таблица: csv/203-csv/855.csv\n", + "Размер: 8 строк, 6 колонок\n", + "--------------------------------------------------\n", + "Оригинальная таблица:\n", + " Date City Venue \\\n", + "0 15 January Dublin RTÉ Studios \n", + "1 (2) \\\"We Are the Champions\\\"\" The Late Late Show NaN \n", + "2 31 January Dublin The O2 \n", + "3 10 May London Dominion Theatre \n", + "4 3 September Utrecht Beatrix\\nTheatre \n", + "5 4 September London Dominion Theatre \n", + "6 12 September Stockholm Cirkus Arena Restaurang \n", + "7 21 October Berlin Theater des Westens \n", "\n", + " Member Performance \\\n", + "0 Brian & Roger (1) \\We Will Rock You\\\" \n", + "1 NaN NaN \n", + "2 Brian (1) \\Bohemian Rhapsody\\\"\" \n", + "3 Brian & Roger (1) \\Bohemian Rhapsody\\\"\" \n", + "4 Brian (1) \\Bohemian Rhapsody\\\"\" \n", + "5 Brian (1) \\Bohemian Rhapsody\\\"\" \n", + "6 Brian (1) \\Bohemian Rhapsody\\\"\" \n", + "7 Brian (1) \\Bohemian Rhapsody\\\"\" \n", "\n", + " Notes \n", + "0 NaN \n", + "1 NaN \n", + "2 Matinee and Evening performances. Roger attend... \n", + "3 8th anniversary. \n", + "4 NaN \n", + "5 Matinee and Evening performances \n", + "6 NaN \n", + "7 NaN \n", + "Очищенная таблица:\n", + " Date City Venue \\\n", + "0 15 January Dublin RTÉ Studios \n", + "1 (2) \\\"We Are the Champions\\\"\" The Late Late Show NaN \n", + "2 31 January Dublin The O2 \n", + "3 10 May London Dominion Theatre \n", + "4 3 September Utrecht Beatrix\\nTheatre \n", + "5 4 September London Dominion Theatre \n", + "6 12 September Stockholm Cirkus Arena Restaurang \n", + "7 21 October Berlin Theater des Westens \n", "\n", + " Member Performance \\\n", + "0 Brian & Roger (1) \\We Will Rock You\\\" \n", + "1 NaN NaN \n", + "2 Brian (1) \\Bohemian Rhapsody\\\"\" \n", + "3 Brian & Roger (1) \\Bohemian Rhapsody\\\"\" \n", + "4 Brian (1) \\Bohemian Rhapsody\\\"\" \n", + "5 Brian (1) \\Bohemian Rhapsody\\\"\" \n", + "6 Brian (1) \\Bohemian Rhapsody\\\"\" \n", + "7 Brian (1) \\Bohemian Rhapsody\\\"\" \n", + "\n", + " Notes Date_datetime \n", + "0 NaN 1-01-15 \n", + "1 NaN NaT \n", + "2 Matinee and Evening performances. Roger attend... 1-01-31 \n", + "3 8th anniversary. 1-05-10 \n", + "4 NaN 1-09-03 \n", + "5 Matinee and Evening performances 1-09-04 \n", + "6 NaN 1-09-12 \n", + "7 NaN 1-10-21 \n", + "\n", + "==================================================\n", + "\n", + "=== Таблица 7/20 (Индекс в train: 3657) ===\n", + "Оригинальная таблица: csv/203-csv/788.csv\n", + "Размер: 18 строк, 5 колонок\n", + "--------------------------------------------------\n", + "Оригинальная таблица:\n", + " Year Author Illustrator \\\n", + "0 1982 Angela Carter\\n(ed. and translator) Michael Foreman \n", + "1 1983 Anthony Browne Browne \n", + "2 1984 John Burningham Burningham \n", + "3 1985 Ted Hughes (1968) Andrew Davidson \n", + "4 1986 Allan Ahlberg Janet Ahlberg \n", + "5 1987 Charles Causley Charles Keeping \n", + "6 1988 Lewis Carroll (1865) Anthony Browne \n", + "7 1989 Martin Waddell Barbara Firth \n", + "8 1990 Quentin Blake Blake \n", + "9 1991 Colin McNaughton McNaughton \n", + "10 1992 Raymond Briggs Briggs \n", + "11 1993 Karen Wallace Mike Bostock \n", + "12 1994 Trish Cooke Helen Oxenbury \n", + "13 1995 Kathy Henderson Patrick Benson \n", + "14 1996 Babette Cole Cole \n", + "15 1997 William Mayne Jonathan Heale \n", + "16 1998 Anthony Browne Browne \n", + "17 1999 Lewis Carroll (1865) Helen Oxenbury \n", + "\n", + " Title Publisher \n", + "0 Sleeping Beauty and other favourite fairy tales V. Gollancz \n", + "1 Gorilla Julia MacRae \n", + "2 Granpa J. Cape \n", + "3 The Iron Man Faber \n", + "4 The Jolly Postman Heinemann \n", + "5 Jack the Treacle Eater Macmillan \n", + "6 Alice's Adventures in Wonderland Julia MacRae \n", + "7 The Park in the Dark Walker \n", + "8 All Join In J. Cape \n", + "9 Have You Seen who's just moved in next door to... Walker \n", + "10 The Man Julia MacRae \n", + "11 Think of an Eel Walker \n", + "12 So Much Walker \n", + "13 The Little Boat Walker \n", + "14 Drop Dead J. Cape \n", + "15 Lady Muck Heinemann \n", + "16 Voices in the Park Doubleday \n", + "17 Alice's Adventures in Wonderland Walker \n", + "Очищенная таблица:\n", + " Year Author Illustrator \\\n", + "0 1982 Angela Carter\\n(ed. and translator) Michael Foreman \n", + "1 1983 Anthony Browne Browne \n", + "2 1984 John Burningham Burningham \n", + "3 1985 Ted Hughes (1968) Andrew Davidson \n", + "4 1986 Allan Ahlberg Janet Ahlberg \n", + "5 1987 Charles Causley Charles Keeping \n", + "6 1988 Lewis Carroll (1865) Anthony Browne \n", + "7 1989 Martin Waddell Barbara Firth \n", + "8 1990 Quentin Blake Blake \n", + "9 1991 Colin McNaughton McNaughton \n", + "10 1992 Raymond Briggs Briggs \n", + "11 1993 Karen Wallace Mike Bostock \n", + "12 1994 Trish Cooke Helen Oxenbury \n", + "13 1995 Kathy Henderson Patrick Benson \n", + "14 1996 Babette Cole Cole \n", + "15 1997 William Mayne Jonathan Heale \n", + "16 1998 Anthony Browne Browne \n", + "17 1999 Lewis Carroll (1865) Helen Oxenbury \n", + "\n", + " Title Publisher \n", + "0 Sleeping Beauty and other favourite fairy tales V. Gollancz \n", + "1 Gorilla Julia MacRae \n", + "2 Granpa J. Cape \n", + "3 The Iron Man Faber \n", + "4 The Jolly Postman Heinemann \n", + "5 Jack the Treacle Eater Macmillan \n", + "6 Alice's Adventures in Wonderland Julia MacRae \n", + "7 The Park in the Dark Walker \n", + "8 All Join In J. Cape \n", + "9 Have You Seen who's just moved in next door to... Walker \n", + "10 The Man Julia MacRae \n", + "11 Think of an Eel Walker \n", + "12 So Much Walker \n", + "13 The Little Boat Walker \n", + "14 Drop Dead J. Cape \n", + "15 Lady Muck Heinemann \n", + "16 Voices in the Park Doubleday \n", + "17 Alice's Adventures in Wonderland Walker \n", + "\n", + "==================================================\n", + "\n", + "=== Таблица 8/20 (Индекс в train: 2286) ===\n", + "Оригинальная таблица: csv/203-csv/352.csv\n", + "Размер: 16 строк, 5 колонок\n", + "--------------------------------------------------\n", + "Оригинальная таблица:\n", + " Position Swara Short name Notation Mnemonic\n", + "0 1 Shadja Sa S sa\n", + "1 2 Shuddha Rishabha Ri R1 ra\n", + "2 3 Chatushruti Rishabha Ri R2 ri\n", + "3 3 Shuddha Gandhara Ga G1 ga\n", + "4 4 Shatshruti Rishabha Ri R3 ru\n", + "5 4 Sadharana Gandhara Ga G2 gi\n", + "6 5 Antara Gandhara Ga G3 gu\n", + "7 6 Shuddha Madhyama Ma M1 ma\n", + "8 7 Prati Madhyama Ma M2 mi\n", + "9 8 Panchama Pa P pa\n", + "10 9 Shuddha Dhaivata Dha D1 dha\n", + "11 10 Chatushruti Dhaivata Dha D2 dhi\n", + "12 10 Shuddha Nishada Ni N1 na\n", + "13 11 Shatshruti Dhaivata Dha D3 dhu\n", + "14 11 Kaisiki Nishada Ni N2 ni\n", + "15 12 Kakali Nishada Ni N3 nu\n", + "Очищенная таблица:\n", + " Position Swara Short name Notation Mnemonic\n", + "0 1 Shadja Sa S sa\n", + "1 2 Shuddha Rishabha Ri R1 ra\n", + "2 3 Chatushruti Rishabha Ri R2 ri\n", + "3 3 Shuddha Gandhara Ga G1 ga\n", + "4 4 Shatshruti Rishabha Ri R3 ru\n", + "5 4 Sadharana Gandhara Ga G2 gi\n", + "6 5 Antara Gandhara Ga G3 gu\n", + "7 6 Shuddha Madhyama Ma M1 ma\n", + "8 7 Prati Madhyama Ma M2 mi\n", + "9 8 Panchama Pa P pa\n", + "10 9 Shuddha Dhaivata Dha D1 dha\n", + "11 10 Chatushruti Dhaivata Dha D2 dhi\n", + "12 10 Shuddha Nishada Ni N1 na\n", + "13 11 Shatshruti Dhaivata Dha D3 dhu\n", + "14 11 Kaisiki Nishada Ni N2 ni\n", + "15 12 Kakali Nishada Ni N3 nu\n", + "\n", + "==================================================\n", + "\n", + "=== Таблица 9/20 (Индекс в train: 12066) ===\n", + "Оригинальная таблица: csv/204-csv/480.csv\n", + "Размер: 32 строк, 6 колонок\n", + "--------------------------------------------------\n", + "Оригинальная таблица:\n", + " Title Alternate Title(s) Year \\\n", + "0 Ocean Hunter, The — 1998.0 \n", + "1 Off Road Challenge — 1997.0 \n", + "2 Off Road Thunder — 2000.0 \n", + "3 Off The Wall — 1991.0 \n", + "4 Oh My God! — 1993.0 \n", + "5 Oishii Puzzle Wa Irimasen Ka — 1993.0 \n", + "6 Oli-Boo-Chu — 1982.0 \n", + "7 Ollie King — 2004.0 \n", + "8 Omega Fighter — 1989.0 \n", + "9 Omega Race — 1982.0 \n", + "10 One Shot One Kill — NaN \n", + "11 Onna Sansirou - Typhoon Gal — 1985.0 \n", + "12 Operation Thunderbolt — 1988.0 \n", + "13 Operation Tiger — 1998.0 \n", + "14 Operation Wolf — 1987.0 \n", + "15 Operation Wolf 3 — 1994.0 \n", + "16 Orbit — 1978.0 \n", + "17 Ordyne — 1988.0 \n", + "18 Oriental Legend — 1997.0 \n", + "19 Osman Cannon Dancer 1996.0 \n", + "20 Otomedius — 2007.0 \n", + "21 Outfoxies, The — 1995.0 \n", + "22 Outlaw — 1976.0 \n", + "23 OutRun — 1986.0 \n", + "24 Out Zone — 1990.0 \n", + "25 OutRun 2 — 2003.0 \n", + "26 OutRunners — 1993.0 \n", + "27 Outtrigger — 1999.0 \n", + "28 Over Drive — 1990.0 \n", + "29 Over Rev — 1997.0 \n", + "30 Over Top — 1996.0 \n", + "31 Ozma Wars — 1978.0 \n", + "\n", + " Manufacturer Genre(s) Max. Players \n", + "0 Sega Shooting gallery 2.0 \n", + "1 Midway Racing 1.0 \n", + "2 Midway Racing 1.0 \n", + "3 Atari Games Breakout 2.0 \n", + "4 Atlus Puzzle 2.0 \n", + "5 Sunsoft Puzzle 2.0 \n", + "6 Irem NaN 2.0 \n", + "7 Smilebit NaN NaN \n", + "8 UPL Scrolling shooter 2.0 \n", + "9 Midway Multidirectional shooter 1.0 \n", + "10 Playmark Shooting gallery 2.0 \n", + "11 Taito NaN 2.0 \n", + "12 Taito Shooting gallery 2.0 \n", + "13 Taito Shooting gallery 2.0 \n", + "14 Taito Shooting gallery 2.0 \n", + "15 Taito Shooting gallery 2.0 \n", + "16 Atari Multi-directional shooter 2.0 \n", + "17 Namco Scrolling shooter 2.0 \n", + "18 International Game System Beat 'em up 4.0 \n", + "19 Mitchell Corporation Platformer 1.0 \n", + "20 Konami Scrolling shooter NaN \n", + "21 Namco Shoot'em up/\\nFighting game 2.0 \n", + "22 Atari Shooter 1.0 \n", + "23 Sega AM2 Racing 1.0 \n", + "24 Toaplan Scrolling shooter 2.0 \n", + "25 Sega AM2 Racing 1.0 \n", + "26 Sega AM1 Racing 2.0 \n", + "27 Sega AM2 First Person Shooter NaN \n", + "28 Konami Racing 1.0 \n", + "29 Jaleco Racing 2.0 \n", + "30 Alpha Denshi Racing 2.0 \n", + "31 SNK Fixed shooter 2.0 \n", + "Очищенная таблица:\n", + " Title Alternate Title(s) Year \\\n", + "0 Ocean Hunter, The NaN 1998.0 \n", + "1 Off Road Challenge NaN 1997.0 \n", + "2 Off Road Thunder NaN 2000.0 \n", + "3 Off The Wall NaN 1991.0 \n", + "4 Oh My God! NaN 1993.0 \n", + "5 Oishii Puzzle Wa Irimasen Ka NaN 1993.0 \n", + "6 Oli-Boo-Chu NaN 1982.0 \n", + "7 Ollie King NaN 2004.0 \n", + "8 Omega Fighter NaN 1989.0 \n", + "9 Omega Race NaN 1982.0 \n", + "10 One Shot One Kill NaN NaN \n", + "11 Onna Sansirou - Typhoon Gal NaN 1985.0 \n", + "12 Operation Thunderbolt NaN 1988.0 \n", + "13 Operation Tiger NaN 1998.0 \n", + "14 Operation Wolf NaN 1987.0 \n", + "15 Operation Wolf 3 NaN 1994.0 \n", + "16 Orbit NaN 1978.0 \n", + "17 Ordyne NaN 1988.0 \n", + "18 Oriental Legend NaN 1997.0 \n", + "19 Osman Cannon Dancer 1996.0 \n", + "20 Otomedius NaN 2007.0 \n", + "21 Outfoxies, The NaN 1995.0 \n", + "22 Outlaw NaN 1976.0 \n", + "23 OutRun NaN 1986.0 \n", + "24 Out Zone NaN 1990.0 \n", + "25 OutRun 2 NaN 2003.0 \n", + "26 OutRunners NaN 1993.0 \n", + "27 Outtrigger NaN 1999.0 \n", + "28 Over Drive NaN 1990.0 \n", + "29 Over Rev NaN 1997.0 \n", + "30 Over Top NaN 1996.0 \n", + "31 Ozma Wars NaN 1978.0 \n", + "\n", + " Manufacturer Genre(s) Max. Players \n", + "0 Sega Shooting gallery 2.0 \n", + "1 Midway Racing 1.0 \n", + "2 Midway Racing 1.0 \n", + "3 Atari Games Breakout 2.0 \n", + "4 Atlus Puzzle 2.0 \n", + "5 Sunsoft Puzzle 2.0 \n", + "6 Irem NaN 2.0 \n", + "7 Smilebit NaN NaN \n", + "8 UPL Scrolling shooter 2.0 \n", + "9 Midway Multidirectional shooter 1.0 \n", + "10 Playmark Shooting gallery 2.0 \n", + "11 Taito NaN 2.0 \n", + "12 Taito Shooting gallery 2.0 \n", + "13 Taito Shooting gallery 2.0 \n", + "14 Taito Shooting gallery 2.0 \n", + "15 Taito Shooting gallery 2.0 \n", + "16 Atari Multi-directional shooter 2.0 \n", + "17 Namco Scrolling shooter 2.0 \n", + "18 International Game System Beat 'em up 4.0 \n", + "19 Mitchell Corporation Platformer 1.0 \n", + "20 Konami Scrolling shooter NaN \n", + "21 Namco Shoot'em up/\\nFighting game 2.0 \n", + "22 Atari Shooter 1.0 \n", + "23 Sega AM2 Racing 1.0 \n", + "24 Toaplan Scrolling shooter 2.0 \n", + "25 Sega AM2 Racing 1.0 \n", + "26 Sega AM1 Racing 2.0 \n", + "27 Sega AM2 First Person Shooter NaN \n", + "28 Konami Racing 1.0 \n", + "29 Jaleco Racing 2.0 \n", + "30 Alpha Denshi Racing 2.0 \n", + "31 SNK Fixed shooter 2.0 \n", + "\n", + "==================================================\n", + "\n", + "=== Таблица 10/20 (Индекс в train: 1679) ===\n", + "Оригинальная таблица: csv/204-csv/530.csv\n", + "Размер: 28 строк, 5 колонок\n", + "--------------------------------------------------\n", + "Оригинальная таблица:\n", + " Type Stage Cyclist Team \\\n", + "0 DNF 2 Rémi Pauriol Cofidis \n", + "1 DNS 3 José Ángel Gómez Marchante Cervélo TestTeam \n", + "2 DNS 3 Daniel Martin Garmin-Slipstream \n", + "3 DNS 3 Joost Posthuma Rabobank \n", + "4 DNF 3 Matteo Bono Lampre-NGC \n", + "5 DNF 3 Anthony Charteau Caisse d'Epargne \n", + "6 DNS 4 Sylvain Calzati Agritubel \n", + "7 DNS 4 Bradley Wiggins Garmin-Slipstream \n", + "8 DNF 4 Jussi Veikkanen Française des Jeux \n", + "9 DNS 5 Íñigo Cuesta Cervélo TestTeam \n", + "10 DNS 5 Enrico Franzoi Liquigas \n", + "11 DNF 5 Steven Cozza Garmin-Slipstream \n", + "12 DNF 5 Philippe Gilbert Silence-Lotto \n", + "13 DNF 5 Brian Vandborg Liquigas \n", + "14 DNS 6 Jelle Vanendert Silence-Lotto \n", + "15 DNF 6 Jose Luis Arrieta Ag2r-La Mondiale \n", + "16 DNF 6 Sébastien Chavanel Française des Jeux \n", + "17 DNF 6 Simon Spilak Lampre-NGC \n", + "18 DNF 6 Thomas Voeckler Bbox Bouygues Telecom \n", + "19 DNS 7 Sebastian Langeveld Rabobank \n", + "20 DNF 7 Samuel Sánchez Euskaltel-Euskadi \n", + "21 DNF 7 Mickael Cherel Française des Jeux \n", + "22 DNF 7 Thomas Fothen Team Milram \n", + "23 DNF 7 Marcel Sieberg Team Columbia-High Road \n", + "24 DNF 7 Tom Veelers Skil-Shimano \n", + "25 DNF 7 Mickael Buffaz Cofidis \n", + "26 DNF 7 Javier Aramendia Euskaltel-Euskadi \n", + "27 DNF 7 Romain Feillu Agritubel \n", + "\n", + " Reason \n", + "0 Broken collarbone \n", + "1 Broken arm sustained from crash in Stage 2 \n", + "2 Illness \n", + "3 Illness \n", + "4 NaN \n", + "5 NaN \n", + "6 NaN \n", + "7 Death in the family \n", + "8 NaN \n", + "9 NaN \n", + "10 NaN \n", + "11 NaN \n", + "12 NaN \n", + "13 NaN \n", + "14 NaN \n", + "15 NaN \n", + "16 NaN \n", + "17 NaN \n", + "18 Shoulder injury sustained from crash \n", + "19 NaN \n", + "20 NaN \n", + "21 NaN \n", + "22 NaN \n", + "23 NaN \n", + "24 NaN \n", + "25 NaN \n", + "26 NaN \n", + "27 NaN \n", + "Очищенная таблица:\n", + " Type Stage Cyclist Team \\\n", + "0 DNF 2 Rémi Pauriol Cofidis \n", + "1 DNS 3 José Ángel Gómez Marchante Cervélo TestTeam \n", + "2 DNS 3 Daniel Martin Garmin-Slipstream \n", + "3 DNS 3 Joost Posthuma Rabobank \n", + "4 DNF 3 Matteo Bono Lampre-NGC \n", + "5 DNF 3 Anthony Charteau Caisse d'Epargne \n", + "6 DNS 4 Sylvain Calzati Agritubel \n", + "7 DNS 4 Bradley Wiggins Garmin-Slipstream \n", + "8 DNF 4 Jussi Veikkanen Française des Jeux \n", + "9 DNS 5 Íñigo Cuesta Cervélo TestTeam \n", + "10 DNS 5 Enrico Franzoi Liquigas \n", + "11 DNF 5 Steven Cozza Garmin-Slipstream \n", + "12 DNF 5 Philippe Gilbert Silence-Lotto \n", + "13 DNF 5 Brian Vandborg Liquigas \n", + "14 DNS 6 Jelle Vanendert Silence-Lotto \n", + "15 DNF 6 Jose Luis Arrieta Ag2r-La Mondiale \n", + "16 DNF 6 Sébastien Chavanel Française des Jeux \n", + "17 DNF 6 Simon Spilak Lampre-NGC \n", + "18 DNF 6 Thomas Voeckler Bbox Bouygues Telecom \n", + "19 DNS 7 Sebastian Langeveld Rabobank \n", + "20 DNF 7 Samuel Sánchez Euskaltel-Euskadi \n", + "21 DNF 7 Mickael Cherel Française des Jeux \n", + "22 DNF 7 Thomas Fothen Team Milram \n", + "23 DNF 7 Marcel Sieberg Team Columbia-High Road \n", + "24 DNF 7 Tom Veelers Skil-Shimano \n", + "25 DNF 7 Mickael Buffaz Cofidis \n", + "26 DNF 7 Javier Aramendia Euskaltel-Euskadi \n", + "27 DNF 7 Romain Feillu Agritubel \n", + "\n", + " Reason \n", + "0 Broken collarbone \n", + "1 Broken arm sustained from crash in Stage 2 \n", + "2 Illness \n", + "3 Illness \n", + "4 NaN \n", + "5 NaN \n", + "6 NaN \n", + "7 Death in the family \n", + "8 NaN \n", + "9 NaN \n", + "10 NaN \n", + "11 NaN \n", + "12 NaN \n", + "13 NaN \n", + "14 NaN \n", + "15 NaN \n", + "16 NaN \n", + "17 NaN \n", + "18 Shoulder injury sustained from crash \n", + "19 NaN \n", + "20 NaN \n", + "21 NaN \n", + "22 NaN \n", + "23 NaN \n", + "24 NaN \n", + "25 NaN \n", + "26 NaN \n", + "27 NaN \n", + "\n", + "==================================================\n", + "\n", + "=== Таблица 11/20 (Индекс в train: 11087) ===\n", + "Оригинальная таблица: csv/203-csv/608.csv\n", + "Размер: 11 строк, 6 колонок\n", + "--------------------------------------------------\n", + "Оригинальная таблица:\n", + " Rank Nation Gold Silver Bronze Total\n", + "0 1 Chinese Taipei (TPE) 2 0 0 2\n", + "1 1 Russia (RUS) 2 0 0 2\n", + "2 3 Great Britain (GBR) 1 0 0 1\n", + "3 4 Slovakia (SVK) 0 2 2 4\n", + "4 5 China (CHN) 0 2 1 3\n", + "5 6 Mexico (MEX) 0 1 1 2\n", + "6 7 Germany (GER) 0 0 2 2\n", + "7 8 South Korea (KOR) 0 0 1 1\n", + "8 8 Switzerland (SUI) 0 0 1 1\n", + "9 8 Thailand (THA) 0 0 1 1\n", + "10 8 Uzbekistan (UZB) 0 0 1 1\n", + "Очищенная таблица:\n", + " Rank Nation Gold Silver Bronze Total Nation_base \\\n", + "0 1 Chinese Taipei (TPE) 2 0 0 2 Chinese Taipei \n", + "1 1 Russia (RUS) 2 0 0 2 Russia \n", + "2 3 Great Britain (GBR) 1 0 0 1 Great Britain \n", + "3 4 Slovakia (SVK) 0 2 2 4 Slovakia \n", + "4 5 China (CHN) 0 2 1 3 China \n", + "5 6 Mexico (MEX) 0 1 1 2 Mexico \n", + "6 7 Germany (GER) 0 0 2 2 Germany \n", + "7 8 South Korea (KOR) 0 0 1 1 South Korea \n", + "8 8 Switzerland (SUI) 0 0 1 1 Switzerland \n", + "9 8 Thailand (THA) 0 0 1 1 Thailand \n", + "10 8 Uzbekistan (UZB) 0 0 1 1 Uzbekistan \n", + "\n", + " Nation_meta \n", + "0 TPE \n", + "1 RUS \n", + "2 GBR \n", + "3 SVK \n", + "4 CHN \n", + "5 MEX \n", + "6 GER \n", + "7 KOR \n", + "8 SUI \n", + "9 THA \n", + "10 UZB \n", + "\n", + "==================================================\n", + "\n", + "=== Таблица 12/20 (Индекс в train: 12135) ===\n", + "Оригинальная таблица: csv/203-csv/281.csv\n", + "Размер: 52 строк, 6 колонок\n", + "--------------------------------------------------\n", + "Оригинальная таблица:\n", + " Season Pitcher Decision Final\\nscore Opponent \\\n", + "0 1962 Bobby Shantz W 11–2 Chicago Cubs \n", + "1 1963 Turk Farrell L 2–9 San Francisco Giants \n", + "2 1964 Ken Johnson W 6–3 Cincinnati Reds \n", + "3 1965 Bob Bruce L 0–2 Philadelphia Phillies \n", + "4 1966 Robin Roberts L 2–3 Los Angeles Dodgers \n", + "5 1967 Mike Cuellar W 6–1 Atlanta Braves \n", + "6 1968 Larry Dierker W 5–4 Pittsburgh Pirates \n", + "7 1969 Don Wilson L 1–2 San Diego Padres \n", + "8 1970 Larry Dierker (2) W 8–5 San Francisco Giants \n", + "9 1971 Larry Dierker (3) W 5–2 Los Angeles Dodgers \n", + "10 1972 Don Wilson (2) L 0–5 San Francisco Giants \n", + "11 1973 Dave Roberts ND (W) 2–1 Atlanta Braves \n", + "12 1974 Dave Roberts (2) L 1–5 San Francisco Giants \n", + "13 1975 Larry Dierker (4) W 6–2 Atlanta Braves \n", + "14 1976 J.R. Richard L 5–11 Cincinnati Reds \n", + "15 1977 J.R. Richard (2) ND (W) 3–2 Atlanta Braves \n", + "16 1978 J.R. Richard (3) L 9–11 Cincinnati Reds \n", + "17 1979 J.R. Richard (4) W 2–1 Atlanta Braves \n", + "18 1980* J.R. Richard (5) W 3–2 Los Angeles Dodgers \n", + "19 1981* Joe Niekro L 0–2 Los Angeles Dodgers \n", + "20 1982 Nolan Ryan L 3–14 St. Louis Cardinals \n", + "21 1983 Joe Niekro (2) ND (L) 7–16 Los Angeles Dodgers \n", + "22 1984 Joe Niekro (3) L 2–4 Montreal Expos \n", + "23 1985 Nolan Ryan (2) W 2–1 Los Angeles Dodgers \n", + "24 1986* Nolan Ryan (3) L 3–8 San Francisco Giants \n", + "25 1987 Mike Scott W 4–3 Los Angeles Dodgers \n", + "26 1988 Mike Scott (2) W 6–3 San Diego Padres \n", + "27 1989 Mike Scott (3) W 10–3 Atlanta Braves \n", + "28 1990 Mike Scott (4) ND (L) 4–8 Cincinnati Reds \n", + "29 1991 Mike Scott (5) L 2–6 Cincinnati Reds \n", + "30 1992 Pete Harnisch L 0–2 Atlanta Braves \n", + "31 1993 Doug Drabek L 1–3 Philadelphia Phillies \n", + "32 1994 Pete Harnisch (2) ND (W) 6–5 Montreal Expos \n", + "33 1995 Doug Drabek (2) W 10–2 San Diego Padres \n", + "34 1996 Shane Reynolds L 3–4 Los Angeles Dodgers \n", + "35 1997* Shane Reynolds (2) W 2–1 Atlanta Braves \n", + "36 1998* Shane Reynolds (3) ND (L) 4–9 San Francisco Giants \n", + "37 1999* Shane Reynolds (4) W 4–2 Chicago Cubs \n", + "38 2000 Shane Reynolds (5) W 5–2 Pittsburgh Pirates \n", + "39 2001* Scott Elarton W 11–3 Milwaukee Brewers \n", + "40 2002 Wade Miller L 3–9 Milwaukee Brewers \n", + "41 2003 Roy Oswalt W 10–4 Colorado Rockies \n", + "42 2004* Roy Oswalt (2) ND (L) 4–5 San Francisco Giants \n", + "43 2005** Roy Oswalt (3) L 3–7 St. Louis Cardinals \n", + "44 2006 Roy Oswalt (4) W 1–0 Florida Marlins \n", + "45 2007 Roy Oswalt (5) ND (L) 2–4 Pittsburgh Pirates \n", + "46 2008 Roy Oswalt (6) L 0–4 San Diego Padres \n", + "47 2009 Roy Oswalt (7) L 2–4 Chicago Cubs \n", + "48 2010 Roy Oswalt (8) L 2–5 San Francisco Giants \n", + "49 2011 Brett Myers ND (L) 4–5 Philadelphia Phillies \n", + "50 2012 Wandy Rodriguez ND (L) 4–5 Colorado Rockies \n", + "51 2013 Bud Norris W 8–2 Texas Rangers \n", + "\n", + " Location \n", + "0 Colt Stadium \n", + "1 Colt Stadium \n", + "2 Crosley Field \n", + "3 Astrodome \n", + "4 Dodger Stadium \n", + "5 Astrodome \n", + "6 Astrodome \n", + "7 San Diego Stadium \n", + "8 Candlestick Park \n", + "9 Astrodome \n", + "10 Astrodome \n", + "11 Atlanta-Fulton County Stadium \n", + "12 Candlestick Park \n", + "13 Astrodome \n", + "14 Riverfront Stadium \n", + "15 Astrodome \n", + "16 Riverfront Stadium \n", + "17 Astrodome \n", + "18 Astrodome \n", + "19 Dodger Stadium \n", + "20 Astrodome \n", + "21 Astrodome \n", + "22 Astrodome \n", + "23 Astrodome \n", + "24 Astrodome \n", + "25 Astrodome \n", + "26 Astrodome \n", + "27 Astrodome \n", + "28 Astrodome \n", + "29 Riverfront Stadium \n", + "30 Astrodome \n", + "31 Astrodome \n", + "32 Astrodome \n", + "33 Jack Murphy Stadium \n", + "34 Astrodome \n", + "35 Astrodome \n", + "36 Astrodome \n", + "37 Astrodome \n", + "38 Three Rivers Stadium \n", + "39 Enron Field \n", + "40 Astros Field \n", + "41 Minute Maid Park \n", + "42 Minute Maid Park \n", + "43 Minute Maid Park \n", + "44 Minute Maid Park \n", + "45 Minute Maid Park \n", + "46 Petco Park \n", + "47 Minute Maid Park \n", + "48 Minute Maid Park \n", + "49 Citizens Bank Park \n", + "50 Minute Maid Park \n", + "51 Minute Maid Park \n", + "Очищенная таблица:\n", + " Season Pitcher Decision Final score Opponent \\\n", + "0 1962.0 Bobby Shantz W 11–2 Chicago Cubs \n", + "1 1963.0 Turk Farrell L 2–9 San Francisco Giants \n", + "2 1964.0 Ken Johnson W 6–3 Cincinnati Reds \n", + "3 1965.0 Bob Bruce L 0–2 Philadelphia Phillies \n", + "4 1966.0 Robin Roberts L 2–3 Los Angeles Dodgers \n", + "5 1967.0 Mike Cuellar W 6–1 Atlanta Braves \n", + "6 1968.0 Larry Dierker W 5–4 Pittsburgh Pirates \n", + "7 1969.0 Don Wilson L 1–2 San Diego Padres \n", + "8 1970.0 Larry Dierker (2) W 8–5 San Francisco Giants \n", + "9 1971.0 Larry Dierker (3) W 5–2 Los Angeles Dodgers \n", + "10 1972.0 Don Wilson (2) L 0–5 San Francisco Giants \n", + "11 1973.0 Dave Roberts ND (W) 2–1 Atlanta Braves \n", + "12 1974.0 Dave Roberts (2) L 1–5 San Francisco Giants \n", + "13 1975.0 Larry Dierker (4) W 6–2 Atlanta Braves \n", + "14 1976.0 J.R. Richard L 5–11 Cincinnati Reds \n", + "15 1977.0 J.R. Richard (2) ND (W) 3–2 Atlanta Braves \n", + "16 1978.0 J.R. Richard (3) L 9–11 Cincinnati Reds \n", + "17 1979.0 J.R. Richard (4) W 2–1 Atlanta Braves \n", + "18 NaN J.R. Richard (5) W 3–2 Los Angeles Dodgers \n", + "19 NaN Joe Niekro L 0–2 Los Angeles Dodgers \n", + "20 1982.0 Nolan Ryan L 3–14 St. Louis Cardinals \n", + "21 1983.0 Joe Niekro (2) ND (L) 7–16 Los Angeles Dodgers \n", + "22 1984.0 Joe Niekro (3) L 2–4 Montreal Expos \n", + "23 1985.0 Nolan Ryan (2) W 2–1 Los Angeles Dodgers \n", + "24 NaN Nolan Ryan (3) L 3–8 San Francisco Giants \n", + "25 1987.0 Mike Scott W 4–3 Los Angeles Dodgers \n", + "26 1988.0 Mike Scott (2) W 6–3 San Diego Padres \n", + "27 1989.0 Mike Scott (3) W 10–3 Atlanta Braves \n", + "28 1990.0 Mike Scott (4) ND (L) 4–8 Cincinnati Reds \n", + "29 1991.0 Mike Scott (5) L 2–6 Cincinnati Reds \n", + "30 1992.0 Pete Harnisch L 0–2 Atlanta Braves \n", + "31 1993.0 Doug Drabek L 1–3 Philadelphia Phillies \n", + "32 1994.0 Pete Harnisch (2) ND (W) 6–5 Montreal Expos \n", + "33 1995.0 Doug Drabek (2) W 10–2 San Diego Padres \n", + "34 1996.0 Shane Reynolds L 3–4 Los Angeles Dodgers \n", + "35 NaN Shane Reynolds (2) W 2–1 Atlanta Braves \n", + "36 NaN Shane Reynolds (3) ND (L) 4–9 San Francisco Giants \n", + "37 NaN Shane Reynolds (4) W 4–2 Chicago Cubs \n", + "38 2000.0 Shane Reynolds (5) W 5–2 Pittsburgh Pirates \n", + "39 NaN Scott Elarton W 11–3 Milwaukee Brewers \n", + "40 2002.0 Wade Miller L 3–9 Milwaukee Brewers \n", + "41 2003.0 Roy Oswalt W 10–4 Colorado Rockies \n", + "42 NaN Roy Oswalt (2) ND (L) 4–5 San Francisco Giants \n", + "43 NaN Roy Oswalt (3) L 3–7 St. Louis Cardinals \n", + "44 2006.0 Roy Oswalt (4) W 1–0 Florida Marlins \n", + "45 2007.0 Roy Oswalt (5) ND (L) 2–4 Pittsburgh Pirates \n", + "46 2008.0 Roy Oswalt (6) L 0–4 San Diego Padres \n", + "47 2009.0 Roy Oswalt (7) L 2–4 Chicago Cubs \n", + "48 2010.0 Roy Oswalt (8) L 2–5 San Francisco Giants \n", + "49 2011.0 Brett Myers ND (L) 4–5 Philadelphia Phillies \n", + "50 2012.0 Wandy Rodriguez ND (L) 4–5 Colorado Rockies \n", + "51 2013.0 Bud Norris W 8–2 Texas Rangers \n", + "\n", + " Location Pitcher_base Pitcher_meta \\\n", + "0 Colt Stadium NaN NaN \n", + "1 Colt Stadium NaN NaN \n", + "2 Crosley Field NaN NaN \n", + "3 Astrodome NaN NaN \n", + "4 Dodger Stadium NaN NaN \n", + "5 Astrodome NaN NaN \n", + "6 Astrodome NaN NaN \n", + "7 San Diego Stadium NaN NaN \n", + "8 Candlestick Park Larry Dierker 2 \n", + "9 Astrodome Larry Dierker 3 \n", + "10 Astrodome Don Wilson 2 \n", + "11 Atlanta-Fulton County Stadium NaN NaN \n", + "12 Candlestick Park Dave Roberts 2 \n", + "13 Astrodome Larry Dierker 4 \n", + "14 Riverfront Stadium NaN NaN \n", + "15 Astrodome J.R. Richard 2 \n", + "16 Riverfront Stadium J.R. Richard 3 \n", + "17 Astrodome J.R. Richard 4 \n", + "18 Astrodome J.R. Richard 5 \n", + "19 Dodger Stadium NaN NaN \n", + "20 Astrodome NaN NaN \n", + "21 Astrodome Joe Niekro 2 \n", + "22 Astrodome Joe Niekro 3 \n", + "23 Astrodome Nolan Ryan 2 \n", + "24 Astrodome Nolan Ryan 3 \n", + "25 Astrodome NaN NaN \n", + "26 Astrodome Mike Scott 2 \n", + "27 Astrodome Mike Scott 3 \n", + "28 Astrodome Mike Scott 4 \n", + "29 Riverfront Stadium Mike Scott 5 \n", + "30 Astrodome NaN NaN \n", + "31 Astrodome NaN NaN \n", + "32 Astrodome Pete Harnisch 2 \n", + "33 Jack Murphy Stadium Doug Drabek 2 \n", + "34 Astrodome NaN NaN \n", + "35 Astrodome Shane Reynolds 2 \n", + "36 Astrodome Shane Reynolds 3 \n", + "37 Astrodome Shane Reynolds 4 \n", + "38 Three Rivers Stadium Shane Reynolds 5 \n", + "39 Enron Field NaN NaN \n", + "40 Astros Field NaN NaN \n", + "41 Minute Maid Park NaN NaN \n", + "42 Minute Maid Park Roy Oswalt 2 \n", + "43 Minute Maid Park Roy Oswalt 3 \n", + "44 Minute Maid Park Roy Oswalt 4 \n", + "45 Minute Maid Park Roy Oswalt 5 \n", + "46 Petco Park Roy Oswalt 6 \n", + "47 Minute Maid Park Roy Oswalt 7 \n", + "48 Minute Maid Park Roy Oswalt 8 \n", + "49 Citizens Bank Park NaN NaN \n", + "50 Minute Maid Park NaN NaN \n", + "51 Minute Maid Park NaN NaN \n", + "\n", + " Final score_score1 Final score_score2 \n", + "0 11 2 \n", + "1 2 9 \n", + "2 6 3 \n", + "3 0 2 \n", + "4 2 3 \n", + "5 6 1 \n", + "6 5 4 \n", + "7 1 2 \n", + "8 8 5 \n", + "9 5 2 \n", + "10 0 5 \n", + "11 2 1 \n", + "12 1 5 \n", + "13 6 2 \n", + "14 5 11 \n", + "15 3 2 \n", + "16 9 11 \n", + "17 2 1 \n", + "18 3 2 \n", + "19 0 2 \n", + "20 3 14 \n", + "21 7 16 \n", + "22 2 4 \n", + "23 2 1 \n", + "24 3 8 \n", + "25 4 3 \n", + "26 6 3 \n", + "27 10 3 \n", + "28 4 8 \n", + "29 2 6 \n", + "30 0 2 \n", + "31 1 3 \n", + "32 6 5 \n", + "33 10 2 \n", + "34 3 4 \n", + "35 2 1 \n", + "36 4 9 \n", + "37 4 2 \n", + "38 5 2 \n", + "39 11 3 \n", + "40 3 9 \n", + "41 10 4 \n", + "42 4 5 \n", + "43 3 7 \n", + "44 1 0 \n", + "45 2 4 \n", + "46 0 4 \n", + "47 2 4 \n", + "48 2 5 \n", + "49 4 5 \n", + "50 4 5 \n", + "51 8 2 \n", + "\n", + "==================================================\n", + "\n", + "=== Таблица 13/20 (Индекс в train: 8935) ===\n", + "Оригинальная таблица: csv/203-csv/268.csv\n", + "Размер: 16 строк, 5 колонок\n", + "--------------------------------------------------\n", + "Оригинальная таблица:\n", + " Date Name Nationality Tonnage\\n(GRT) \\\n", + "0 7 October 1941 Svend Foyn United Kingdom 14,795 \n", + "1 16 February 1942 Monagas Venezuela 2,650 \n", + "2 16 February 1942 San Nicholas United Kingdom 2,391 \n", + "3 16 February 1942 Tia Juana United Kingdom 2,395 \n", + "4 22 February 1942 J.N.Pew United States 9,033 \n", + "5 23 February 1942 Sun United States 9,002 \n", + "6 23 February 1942 Thalia Panama 8,329 \n", + "7 11 May 1942 Cape of Good Hope United Kingdom 4,963 \n", + "8 24 May 1942 Gonçalves Dias Brazil 4,996 \n", + "9 28 May 1942 Alcoa Pilgrim United States 6,759 \n", + "10 3 June 1942 M.F. Ellliot United States 6,940 \n", + "11 9 June 1942 Bruxelles Belgium 5,085 \n", + "12 9 June 1942 Franklin K. Lane United States 6,589 \n", + "13 15 June 1942 Cold Harbor Panama 5,010 \n", + "14 15 June 1942 Scottsburg United States 8,010 \n", + "15 15 June 1942 West Hardaway United States 5,702 \n", + "\n", + " Fate \n", + "0 Damaged \n", + "1 Sunk \n", + "2 Sunk \n", + "3 Sunk \n", + "4 Sunk \n", + "5 Damaged \n", + "6 Sunk \n", + "7 Sunk \n", + "8 Sunk \n", + "9 Sunk \n", + "10 Sunk \n", + "11 Sunk \n", + "12 Sunk \n", + "13 Sunk \n", + "14 Sunk \n", + "15 Sunk \n", + "Очищенная таблица:\n", + " Date Name Nationality Tonnage (GRT) \\\n", + "0 7 October 1941 Svend Foyn United Kingdom 14795 \n", + "1 16 February 1942 Monagas Venezuela 2650 \n", + "2 16 February 1942 San Nicholas United Kingdom 2391 \n", + "3 16 February 1942 Tia Juana United Kingdom 2395 \n", + "4 22 February 1942 J.N.Pew United States 9033 \n", + "5 23 February 1942 Sun United States 9002 \n", + "6 23 February 1942 Thalia Panama 8329 \n", + "7 11 May 1942 Cape of Good Hope United Kingdom 4963 \n", + "8 24 May 1942 Gonçalves Dias Brazil 4996 \n", + "9 28 May 1942 Alcoa Pilgrim United States 6759 \n", + "10 3 June 1942 M.F. Ellliot United States 6940 \n", + "11 9 June 1942 Bruxelles Belgium 5085 \n", + "12 9 June 1942 Franklin K. Lane United States 6589 \n", + "13 15 June 1942 Cold Harbor Panama 5010 \n", + "14 15 June 1942 Scottsburg United States 8010 \n", + "15 15 June 1942 West Hardaway United States 5702 \n", + "\n", + " Fate Date_datetime \n", + "0 Damaged 1941-10-07 \n", + "1 Sunk 1942-02-16 \n", + "2 Sunk 1942-02-16 \n", + "3 Sunk 1942-02-16 \n", + "4 Sunk 1942-02-22 \n", + "5 Damaged 1942-02-23 \n", + "6 Sunk 1942-02-23 \n", + "7 Sunk 1942-05-11 \n", + "8 Sunk 1942-05-24 \n", + "9 Sunk 1942-05-28 \n", + "10 Sunk 1942-06-03 \n", + "11 Sunk 1942-06-09 \n", + "12 Sunk 1942-06-09 \n", + "13 Sunk 1942-06-15 \n", + "14 Sunk 1942-06-15 \n", + "15 Sunk 1942-06-15 \n", + "\n", + "==================================================\n", "\n" ] - } - ], - "source": [ - "for i in range(n):\n", - " # Очищаем код для текущей итерации\n", - " current_code = parse_panda_code(code[i])\n", - " \n", - " #print(f\"Итерация {i} | Запускаем выражение: {current_code}\")\n", - " \n", - " try:\n", - " # Читаем нужный датафрейм\n", - " df = pd.read_csv('data/' + train.context.iloc[i])\n", - " target = train.iloc[i].targetValue\n", - " \n", - " # Передаем в eval() код ИМЕННО для текущей итерации\n", - " result = eval(current_code)\n", - " \n", - " # print(\"--- Результат ---\")\n", - " # print(result)\n", - " # print(\"--- Правильный ответ ---\")\n", - " # print(target)\n", - " # print(\"-\" * 40)\n", - " if result == target:\n", - " print(current_code)\n", - " \n", - " except Exception as e:\n", - " print()\n", - " # Выводим реальный текст ошибки, чтобы понять, в чем проблема\n", - " #print(f\"Ошибка на итерации {i}: {e}\")\n", - " #print(\"-\" * 40)" - ] - }, - { - "cell_type": "code", - "execution_count": 61, - "id": "34a693df-cd37-4075-869b-1e1b837da533", - "metadata": {}, - "outputs": [], - "source": [ - "df = pd.read_csv('data/'+ train.context.iloc[0])" - ] - }, - { - "cell_type": "code", - "execution_count": 75, - "id": "4d229f11-3a1f-4b98-a860-b2dc58c623e7", - "metadata": {}, - "outputs": [ + }, { "name": "stderr", "output_type": "stream", "text": [ - "WARNING: Ignoring invalid distribution ~atplotlib (C:\\Users\\PC\\anaconda3\\Lib\\site-packages)\n" + "C:\\Users\\PC\\MyProjects\\Semtab\\WTQ\\Qwen\\normalize.py:112: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n", + " clean_date_attempt = df[col].str.replace(r'\\*$', '', regex=True)\n", + "C:\\Users\\PC\\MyProjects\\Semtab\\WTQ\\Qwen\\normalize.py:112: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n", + " clean_date_attempt = df[col].str.replace(r'\\*$', '', regex=True)\n" ] - } - ], + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== Таблица 14/20 (Индекс в train: 1424) ===\n", + "Оригинальная таблица: csv/204-csv/539.csv\n", + "Размер: 128 строк, 5 колонок\n", + "--------------------------------------------------\n", + "Оригинальная таблица:\n", + " Date Name Moving from \\\n", + "0 3 March 2009 Silvestre Varela Estrela da Amadora \n", + "1 14 April 2009 Mario Rondón Pontassolense \n", + "2 18 April 2009 Patric São Caetano \n", + "3 23 April 2009 Orlando Sá Braga \n", + "4 8 May 2009 Kamani Hill Wolfsburg \n", + ".. ... ... ... \n", + "123 31 August 2009 Milan Purović Sporting CP \n", + "124 31 August 2009 Nuno Frechaut Braga \n", + "125 31 August 2009 Ivo Pinto Porto \n", + "126 31 August 2009 Patric Benfica \n", + "127 31 August 2009 Tiago Pinto Sporting CP \n", + "\n", + " Moving to Fee \n", + "0 Porto Free \n", + "1 Nacional Free \n", + "2 Benfica Undisclosed Fee \n", + "3 Porto Undisclosed Fee \n", + "4 Vitória de Guimarães Free \n", + ".. ... ... \n", + "123 Videoton Loan \n", + "124 Metz Free \n", + "125 Vitória de Setúbal Loan \n", + "126 Cruzeiro Loan \n", + "127 Braga Free \n", + "\n", + "[128 rows x 5 columns]\n", + "Очищенная таблица:\n", + " Date Name Moving from \\\n", + "0 3 March 2009 Silvestre Varela Estrela da Amadora \n", + "1 14 April 2009 Mario Rondón Pontassolense \n", + "2 18 April 2009 Patric São Caetano \n", + "3 23 April 2009 Orlando Sá Braga \n", + "4 8 May 2009 Kamani Hill Wolfsburg \n", + ".. ... ... ... \n", + "123 31 August 2009 Milan Purović Sporting CP \n", + "124 31 August 2009 Nuno Frechaut Braga \n", + "125 31 August 2009 Ivo Pinto Porto \n", + "126 31 August 2009 Patric Benfica \n", + "127 31 August 2009 Tiago Pinto Sporting CP \n", + "\n", + " Moving to Fee Date_datetime \n", + "0 Porto Free 2009-03-03 \n", + "1 Nacional Free 2009-04-14 \n", + "2 Benfica Undisclosed Fee 2009-04-18 \n", + "3 Porto Undisclosed Fee 2009-04-23 \n", + "4 Vitória de Guimarães Free 2009-05-08 \n", + ".. ... ... ... \n", + "123 Videoton Loan 2009-08-31 \n", + "124 Metz Free 2009-08-31 \n", + "125 Vitória de Setúbal Loan 2009-08-31 \n", + "126 Cruzeiro Loan 2009-08-31 \n", + "127 Braga Free 2009-08-31 \n", + "\n", + "[128 rows x 6 columns]\n", + "\n", + "==================================================\n", + "\n", + "=== Таблица 15/20 (Индекс в train: 9674) ===\n", + "Оригинальная таблица: csv/203-csv/271.csv\n", + "Размер: 12 строк, 8 колонок\n", + "--------------------------------------------------\n", + "Оригинальная таблица:\n", + " Date Series Circuit \\\n", + "0 23 Feb ATCC Round 1 Amaroo Park \n", + "1 8 Mar ATCC Round 2 Sandown International Raceway \n", + "2 15 Mar ATCC Round 3 Symmons Plains Raceway \n", + "3 5 Apr ATCC Round 4 Winton Motor Raceway \n", + "4 3 May ATCC Round 5 Lakeside International Raceway \n", + "5 24 May ATCC Round 6 Eastern Creek Raceway \n", + "6 31 May ATCC Round 7 Mallala Motor Sport Park \n", + "7 7 Jun ATCC Round 8 Barbagallo Raceway \n", + "8 21 Jun ATCC Round 9 Oran Park Raceway \n", + "9 13 Sep Drink/Drive Sandown 500 Sandown International Raceway \n", + "10 4 Oct Tooheys 1000 Mount Panorama Circuit \n", + "11 8 Nov Clarke Shoes Group A Finale Adelaide Street Circuit \n", + "\n", + " City / State Winner \\\n", + "0 Sydney, New South Wales Mark Skaife \n", + "1 Melbourne, Victoria John Bowe \n", + "2 Launceston, Tasmania Glenn Seton \n", + "3 Benalla, Victoria Mark Skaife \n", + "4 Brisbane, Queensland Tony Longhurst \n", + "5 Sydney, New South Wales John Bowe \n", + "6 Mallala, South Australia Mark Skaife \n", + "7 Perth, Western Australia John Bowe \n", + "8 Sydney, New South Wales Mark Skaife \n", + "9 Melbourne, Victoria Larry Perkins\\nSteve Harrington \n", + "10 Bathurst, New South Wales Mark Skaife\\nJim Richards \n", + "11 Adelaide, South Australia Jim Richards \n", + "\n", + " Team Car Report \n", + "0 Winfield Team Nissan Nissan Skyline R32 GT-R NaN \n", + "1 Shell Ultra-High Racing Ford Sierra RS500 NaN \n", + "2 Peter Jackson Racing Ford Sierra RS500 NaN \n", + "3 Winfield Team Nissan Nissan Skyline R32 GT-R NaN \n", + "4 Benson & Hedges Racing BMW M3 Evolution NaN \n", + "5 Shell Ultra-High Racing Ford Sierra RS500 NaN \n", + "6 Winfield Team Nissan Nissan Skyline R32 GT-R NaN \n", + "7 Shell Ultra-High Racing Ford Sierra RS500 NaN \n", + "8 Winfield Team Nissan Nissan Skyline R32 GT-R NaN \n", + "9 Bob Jane T-Marts Racing Holden VL Commodore SS Group A SV report \n", + "10 Winfield Team Nissan Nissan Skyline R32 GT-R report \n", + "11 Winfield Team Nissan Nissan Skyline R32 GT-R NaN \n", + "Очищенная таблица:\n", + " Date Series Circuit \\\n", + "0 23 Feb ATCC Round 1 Amaroo Park \n", + "1 8 Mar ATCC Round 2 Sandown International Raceway \n", + "2 15 Mar ATCC Round 3 Symmons Plains Raceway \n", + "3 5 Apr ATCC Round 4 Winton Motor Raceway \n", + "4 3 May ATCC Round 5 Lakeside International Raceway \n", + "5 24 May ATCC Round 6 Eastern Creek Raceway \n", + "6 31 May ATCC Round 7 Mallala Motor Sport Park \n", + "7 7 Jun ATCC Round 8 Barbagallo Raceway \n", + "8 21 Jun ATCC Round 9 Oran Park Raceway \n", + "9 13 Sep Drink/Drive Sandown 500 Sandown International Raceway \n", + "10 4 Oct Tooheys 1000 Mount Panorama Circuit \n", + "11 8 Nov Clarke Shoes Group A Finale Adelaide Street Circuit \n", + "\n", + " City / State Winner \\\n", + "0 Sydney, New South Wales Mark Skaife \n", + "1 Melbourne, Victoria John Bowe \n", + "2 Launceston, Tasmania Glenn Seton \n", + "3 Benalla, Victoria Mark Skaife \n", + "4 Brisbane, Queensland Tony Longhurst \n", + "5 Sydney, New South Wales John Bowe \n", + "6 Mallala, South Australia Mark Skaife \n", + "7 Perth, Western Australia John Bowe \n", + "8 Sydney, New South Wales Mark Skaife \n", + "9 Melbourne, Victoria Larry Perkins\\nSteve Harrington \n", + "10 Bathurst, New South Wales Mark Skaife\\nJim Richards \n", + "11 Adelaide, South Australia Jim Richards \n", + "\n", + " Team Car Report \\\n", + "0 Winfield Team Nissan Nissan Skyline R32 GT-R NaN \n", + "1 Shell Ultra-High Racing Ford Sierra RS500 NaN \n", + "2 Peter Jackson Racing Ford Sierra RS500 NaN \n", + "3 Winfield Team Nissan Nissan Skyline R32 GT-R NaN \n", + "4 Benson & Hedges Racing BMW M3 Evolution NaN \n", + "5 Shell Ultra-High Racing Ford Sierra RS500 NaN \n", + "6 Winfield Team Nissan Nissan Skyline R32 GT-R NaN \n", + "7 Shell Ultra-High Racing Ford Sierra RS500 NaN \n", + "8 Winfield Team Nissan Nissan Skyline R32 GT-R NaN \n", + "9 Bob Jane T-Marts Racing Holden VL Commodore SS Group A SV report \n", + "10 Winfield Team Nissan Nissan Skyline R32 GT-R report \n", + "11 Winfield Team Nissan Nissan Skyline R32 GT-R NaN \n", + "\n", + " Date_datetime \n", + "0 1-02-23 \n", + "1 1-03-08 \n", + "2 1-03-15 \n", + "3 1-04-05 \n", + "4 1-05-03 \n", + "5 1-05-24 \n", + "6 1-05-31 \n", + "7 1-06-07 \n", + "8 1-06-21 \n", + "9 1-09-13 \n", + "10 1-10-04 \n", + "11 1-11-08 \n", + "\n", + "==================================================\n", + "\n", + "=== Таблица 16/20 (Индекс в train: 6912) ===\n", + "Оригинальная таблица: csv/204-csv/443.csv\n", + "Размер: 17 строк, 6 колонок\n", + "--------------------------------------------------\n", + "Оригинальная таблица:\n", + " Week Date Opponent Result \\\n", + "0 1 September 3, 1995 at Miami Dolphins L 52–14 \n", + "1 2 September 10, 1995 Indianapolis Colts L 27–24 (OT) \n", + "2 3 September 17, 1995 Jacksonville Jaguars W 27–10 \n", + "3 4 September 24, 1995 at Atlanta Falcons L 13–3 \n", + "4 5 October 1, 1995 Oakland Raiders L 47–10 \n", + "5 6 October 8, 1995 at Buffalo Bills L 29–10 \n", + "6 7 October 15, 1995 at Carolina Panthers L 26–15 \n", + "7 8 October 22, 1995 Miami Dolphins W 17–16 \n", + "8 9 October 29, 1995 at Indianapolis Colts L 17–10 \n", + "9 10 November 5, 1995 New England Patriots L 20–7 \n", + "10 11 Bye Bye Bye \n", + "11 12 November 19, 1995 Buffalo Bills L 28–26 \n", + "12 13 November 26, 1995 at Seattle Seahawks W 16–10 \n", + "13 14 December 3, 1995 St. Louis Rams L 23–20 \n", + "14 15 December 10, 1995 at New England Patriots L 31–28 \n", + "15 16 December 17, 1995 at Houston Oilers L 23–6 \n", + "16 17 December 24, 1995 New Orleans Saints L 12–0 \n", + "\n", + " Game site Attendance \n", + "0 Joe Robbie Stadium 71,317 \n", + "1 The Meadowlands 65,134 \n", + "2 The Meadowlands 49,970 \n", + "3 Georgia Dome 40,778 \n", + "4 The Meadowlands 68,941 \n", + "5 Rich Stadium 79,485 \n", + "6 Memorial Stadium 52,613 \n", + "7 The Meadowlands 67,228 \n", + "8 RCA Dome 49,250 \n", + "9 The Meadowlands 61,462 \n", + "10 Bye Bye \n", + "11 The Meadowlands 54,436 \n", + "12 Kingdome 41,160 \n", + "13 The Meadowlands 52,023 \n", + "14 Foxboro Stadium 46,617 \n", + "15 Astrodome 35,873 \n", + "16 The Meadowlands 28,885 \n", + "Очищенная таблица:\n", + " Week Date Opponent Result \\\n", + "0 1 September 3, 1995 at Miami Dolphins L 52–14 \n", + "1 2 September 10, 1995 Indianapolis Colts L 27–24 (OT) \n", + "2 3 September 17, 1995 Jacksonville Jaguars W 27–10 \n", + "3 4 September 24, 1995 at Atlanta Falcons L 13–3 \n", + "4 5 October 1, 1995 Oakland Raiders L 47–10 \n", + "5 6 October 8, 1995 at Buffalo Bills L 29–10 \n", + "6 7 October 15, 1995 at Carolina Panthers L 26–15 \n", + "7 8 October 22, 1995 Miami Dolphins W 17–16 \n", + "8 9 October 29, 1995 at Indianapolis Colts L 17–10 \n", + "9 10 November 5, 1995 New England Patriots L 20–7 \n", + "10 11 Bye Bye Bye \n", + "11 12 November 19, 1995 Buffalo Bills L 28–26 \n", + "12 13 November 26, 1995 at Seattle Seahawks W 16–10 \n", + "13 14 December 3, 1995 St. Louis Rams L 23–20 \n", + "14 15 December 10, 1995 at New England Patriots L 31–28 \n", + "15 16 December 17, 1995 at Houston Oilers L 23–6 \n", + "16 17 December 24, 1995 New Orleans Saints L 12–0 \n", + "\n", + " Game site Attendance Date_datetime Result_score1 Result_score2 \n", + "0 Joe Robbie Stadium 71317.0 1995-09-03 52.0 14.0 \n", + "1 The Meadowlands 65134.0 1995-09-10 27.0 24.0 \n", + "2 The Meadowlands 49970.0 1995-09-17 27.0 10.0 \n", + "3 Georgia Dome 40778.0 1995-09-24 13.0 3.0 \n", + "4 The Meadowlands 68941.0 1995-10-01 47.0 10.0 \n", + "5 Rich Stadium 79485.0 1995-10-08 29.0 10.0 \n", + "6 Memorial Stadium 52613.0 1995-10-15 26.0 15.0 \n", + "7 The Meadowlands 67228.0 1995-10-22 17.0 16.0 \n", + "8 RCA Dome 49250.0 1995-10-29 17.0 10.0 \n", + "9 The Meadowlands 61462.0 1995-11-05 20.0 7.0 \n", + "10 Bye NaN NaT NaN NaN \n", + "11 The Meadowlands 54436.0 1995-11-19 28.0 26.0 \n", + "12 Kingdome 41160.0 1995-11-26 16.0 10.0 \n", + "13 The Meadowlands 52023.0 1995-12-03 23.0 20.0 \n", + "14 Foxboro Stadium 46617.0 1995-12-10 31.0 28.0 \n", + "15 Astrodome 35873.0 1995-12-17 23.0 6.0 \n", + "16 The Meadowlands 28885.0 1995-12-24 12.0 0.0 \n", + "\n", + "==================================================\n", + "\n", + "=== Таблица 17/20 (Индекс в train: 520) ===\n", + "Оригинальная таблица: csv/204-csv/517.csv\n", + "Размер: 34 строк, 6 колонок\n", + "--------------------------------------------------\n", + "Оригинальная таблица:\n", + " Name Date of birth Club \\\n", + "0 Francis Bosschaerts 15 October 1956 Heist \n", + "1 Peter Maes 1 June 1964 Lokeren \n", + "2 Hein Vanhaezebrouck 16 February 1964 Kortrijk \n", + "3 Frederik Vanderbiest 10 October 1977 Oostende \n", + "4 Arnauld Mercier 4 June 1972 Boussu Dour \n", + "5 Frank Defays 23 January 1974 Virton \n", + "6 Serhiy Serebrennikov 1 September 1976 Roeselare \n", + "7 Regi Van Acker 25 April 1955 Hoogstraten \n", + "8 Francky Dury 11 October 1957 Zulte Waregem \n", + "9 Dante Brogno 2 May 1966 Tubize \n", + "10 Eric Franken NaN ASV Geel \n", + "11 John van den Brom 4 October 1966 Anderlecht \n", + "12 Tintín Márquez 7 January 1962 Eupen \n", + "13 Lorenzo Staelens 30 April 1964 Cercle Brugge \n", + "14 Dennis van Wijk 16 December 1962 Westerlo \n", + "15 Stanley Menzo 15 October 1963 Lierse \n", + "16 Yannick Ferrera 24 September 1980 Sint-Truiden \n", + "17 Guy Luzon 7 August 1975 Standard Liège \n", + "18 Jimmy Floyd Hasselbaink 27 March 1972 Antwerp \n", + "19 Philippe Médery NaN Visé \n", + "20 Felice Mazzu 12 March 1966 Charleroi \n", + "21 Stijn Vreven 18 July 1973 Lommel United \n", + "22 Michel Preud'homme 24 January 1959 Club Brugge \n", + "23 Lionel Bah 2 February 1980 WS Brussels \n", + "24 Guido Brepoels 7 June 1961 Dessel Sport \n", + "25 Čedomir Janevski 3 July 1961 Mons \n", + "26 Mircea Rednic 9 April 1962 Gent \n", + "27 Bob Peeters 10 January 1974 Waasland-Beveren \n", + "28 Rachid Chihab NaN Mouscron-Péruwelz \n", + "29 Franky Vercauteren 28 October 1956 Mechelen \n", + "30 Jean-Guy Wallemme 10 August 1967 RWDM Brussels \n", + "31 René Desaeyere 14 September 1947 Aalst \n", + "32 Emilio Ferrera 19 June 1967 Genk \n", + "33 Ivan Leko 7 February 1978 OH Leuven \n", + "\n", + " Division Appointed Time as manager \n", + "0 Belgian Second Division 1 June 1999 15 years, 25 days \n", + "1 Belgian Pro League 20 May 2010 4 years, 37 days \n", + "2 Belgian Pro League 6 June 2010 4 years, 20 days \n", + "3 Belgian Pro League 15 February 2011 3 years, 131 days \n", + "4 Belgian Second Division 21 April 2011 3 years, 66 days \n", + "5 Belgian Second Division 6 June 2011 3 years, 20 days \n", + "6 Belgian Second Division 30 June 2011 2 years, 361 days \n", + "7 Belgian Second Division 23 November 2011 2 years, 215 days \n", + "8 Belgian Pro League 30 December 2011 2 years, 178 days \n", + "9 Belgian Second Division 26 February 2012 2 years, 120 days \n", + "10 Belgian Second Division 20 March 2012 2 years, 98 days \n", + "11 Belgian Pro League 30 May 2012 2 years, 27 days \n", + "12 Belgian Second Division 6 July 2012 1 year, 355 days \n", + "13 Belgian Pro League 2 April 2013 1 year, 85 days \n", + "14 Belgian Second Division 29 April 2013 1 year, 58 days \n", + "15 Belgian Pro League 14 May 2013 1 year, 43 days \n", + "16 Belgian Second Division 24 May 2013 1 year, 33 days \n", + "17 Belgian Pro League 27 May 2013 1 year, 30 days \n", + "18 Belgian Second Division 29 May 2013 1 year, 28 days \n", + "19 Belgian Second Division 31 May 2013 1 year, 26 days \n", + "20 Belgian Pro League 1 June 2013 1 year, 25 days \n", + "21 Belgian Second Division 1 June 2013 1 year, 25 days \n", + "22 Belgian Pro League 21 September 2013 0 years, 278 days \n", + "23 Belgian Second Division 21 September 2013 0 years, 278 days \n", + "24 Belgian Second Division 24 September 2013 0 years, 275 days \n", + "25 Belgian Pro League 27 September 2013 0 years, 272 days \n", + "26 Belgian Pro League 1 October 2013 0 years, 268 days \n", + "27 Belgian Pro League 5 November 2013 0 years, 233 days \n", + "28 Belgian Second Division 19 December 2013 0 years, 189 days \n", + "29 Belgian Pro League 5 January 2014 0 years, 172 days \n", + "30 Belgian Second Division 30 January 2014 0 years, 147 days \n", + "31 Belgian Second Division 5 February 2014 0 years, 141 days \n", + "32 Belgian Pro League 24 February 2014 0 years, 122 days \n", + "33 Belgian Pro League 25 February 2014 0 years, 121 days \n", + "Очищенная таблица:\n", + " Name Date of birth Club \\\n", + "0 Francis Bosschaerts 15 October 1956 Heist \n", + "1 Peter Maes 1 June 1964 Lokeren \n", + "2 Hein Vanhaezebrouck 16 February 1964 Kortrijk \n", + "3 Frederik Vanderbiest 10 October 1977 Oostende \n", + "4 Arnauld Mercier 4 June 1972 Boussu Dour \n", + "5 Frank Defays 23 January 1974 Virton \n", + "6 Serhiy Serebrennikov 1 September 1976 Roeselare \n", + "7 Regi Van Acker 25 April 1955 Hoogstraten \n", + "8 Francky Dury 11 October 1957 Zulte Waregem \n", + "9 Dante Brogno 2 May 1966 Tubize \n", + "10 Eric Franken NaN ASV Geel \n", + "11 John van den Brom 4 October 1966 Anderlecht \n", + "12 Tintín Márquez 7 January 1962 Eupen \n", + "13 Lorenzo Staelens 30 April 1964 Cercle Brugge \n", + "14 Dennis van Wijk 16 December 1962 Westerlo \n", + "15 Stanley Menzo 15 October 1963 Lierse \n", + "16 Yannick Ferrera 24 September 1980 Sint-Truiden \n", + "17 Guy Luzon 7 August 1975 Standard Liège \n", + "18 Jimmy Floyd Hasselbaink 27 March 1972 Antwerp \n", + "19 Philippe Médery NaN Visé \n", + "20 Felice Mazzu 12 March 1966 Charleroi \n", + "21 Stijn Vreven 18 July 1973 Lommel United \n", + "22 Michel Preud'homme 24 January 1959 Club Brugge \n", + "23 Lionel Bah 2 February 1980 WS Brussels \n", + "24 Guido Brepoels 7 June 1961 Dessel Sport \n", + "25 Čedomir Janevski 3 July 1961 Mons \n", + "26 Mircea Rednic 9 April 1962 Gent \n", + "27 Bob Peeters 10 January 1974 Waasland-Beveren \n", + "28 Rachid Chihab NaN Mouscron-Péruwelz \n", + "29 Franky Vercauteren 28 October 1956 Mechelen \n", + "30 Jean-Guy Wallemme 10 August 1967 RWDM Brussels \n", + "31 René Desaeyere 14 September 1947 Aalst \n", + "32 Emilio Ferrera 19 June 1967 Genk \n", + "33 Ivan Leko 7 February 1978 OH Leuven \n", + "\n", + " Division Appointed Time as manager \\\n", + "0 Belgian Second Division 1 June 1999 15 years, 25 days \n", + "1 Belgian Pro League 20 May 2010 4 years, 37 days \n", + "2 Belgian Pro League 6 June 2010 4 years, 20 days \n", + "3 Belgian Pro League 15 February 2011 3 years, 131 days \n", + "4 Belgian Second Division 21 April 2011 3 years, 66 days \n", + "5 Belgian Second Division 6 June 2011 3 years, 20 days \n", + "6 Belgian Second Division 30 June 2011 2 years, 361 days \n", + "7 Belgian Second Division 23 November 2011 2 years, 215 days \n", + "8 Belgian Pro League 30 December 2011 2 years, 178 days \n", + "9 Belgian Second Division 26 February 2012 2 years, 120 days \n", + "10 Belgian Second Division 20 March 2012 2 years, 98 days \n", + "11 Belgian Pro League 30 May 2012 2 years, 27 days \n", + "12 Belgian Second Division 6 July 2012 1 year, 355 days \n", + "13 Belgian Pro League 2 April 2013 1 year, 85 days \n", + "14 Belgian Second Division 29 April 2013 1 year, 58 days \n", + "15 Belgian Pro League 14 May 2013 1 year, 43 days \n", + "16 Belgian Second Division 24 May 2013 1 year, 33 days \n", + "17 Belgian Pro League 27 May 2013 1 year, 30 days \n", + "18 Belgian Second Division 29 May 2013 1 year, 28 days \n", + "19 Belgian Second Division 31 May 2013 1 year, 26 days \n", + "20 Belgian Pro League 1 June 2013 1 year, 25 days \n", + "21 Belgian Second Division 1 June 2013 1 year, 25 days \n", + "22 Belgian Pro League 21 September 2013 0 years, 278 days \n", + "23 Belgian Second Division 21 September 2013 0 years, 278 days \n", + "24 Belgian Second Division 24 September 2013 0 years, 275 days \n", + "25 Belgian Pro League 27 September 2013 0 years, 272 days \n", + "26 Belgian Pro League 1 October 2013 0 years, 268 days \n", + "27 Belgian Pro League 5 November 2013 0 years, 233 days \n", + "28 Belgian Second Division 19 December 2013 0 years, 189 days \n", + "29 Belgian Pro League 5 January 2014 0 years, 172 days \n", + "30 Belgian Second Division 30 January 2014 0 years, 147 days \n", + "31 Belgian Second Division 5 February 2014 0 years, 141 days \n", + "32 Belgian Pro League 24 February 2014 0 years, 122 days \n", + "33 Belgian Pro League 25 February 2014 0 years, 121 days \n", + "\n", + " Date of birth_datetime \n", + "0 1956-10-15 \n", + "1 1964-06-01 \n", + "2 1964-02-16 \n", + "3 1977-10-10 \n", + "4 1972-06-04 \n", + "5 1974-01-23 \n", + "6 1976-09-01 \n", + "7 1955-04-25 \n", + "8 1957-10-11 \n", + "9 1966-05-02 \n", + "10 NaT \n", + "11 1966-10-04 \n", + "12 1962-01-07 \n", + "13 1964-04-30 \n", + "14 1962-12-16 \n", + "15 1963-10-15 \n", + "16 1980-09-24 \n", + "17 1975-08-07 \n", + "18 1972-03-27 \n", + "19 NaT \n", + "20 1966-03-12 \n", + "21 1973-07-18 \n", + "22 1959-01-24 \n", + "23 1980-02-02 \n", + "24 1961-06-07 \n", + "25 1961-07-03 \n", + "26 1962-04-09 \n", + "27 1974-01-10 \n", + "28 NaT \n", + "29 1956-10-28 \n", + "30 1967-08-10 \n", + "31 1947-09-14 \n", + "32 1967-06-19 \n", + "33 1978-02-07 \n", + "\n", + "==================================================\n", + "\n", + "=== Таблица 18/20 (Индекс в train: 488) ===\n", + "Оригинальная таблица: csv/203-csv/857.csv\n", + "Размер: 7 строк, 12 колонок\n", + "--------------------------------------------------\n", + "Оригинальная таблица:\n", + " Denomination 1870 1880 1890 1900 1910 1920 1930 1941 \\\n", + "0 Roman Catholic 72.3% 69.4% 64.7% 60.7% 59.8% 59.1% 60.7% 63.1% \n", + "1 Calvinist 4.8% 6.1% 7.4% 8.9% 9.9% 10.9% 12.1% 13.6% \n", + "2 Lutheran 5.3% 5.5% 5.6% 5.3% 4.9% 4.8% 5% 5.3% \n", + "3 Jewish 16.6% 19.7% 21% 23.6% 23.1% 23.2% 20.3% 15.8% \n", + "4 Others 1% 1.3% 1.3% 1.5% 2.2% 2% 1.9% 1.6% \n", + "5 Without religion 0% 0% 0% 0% 0% 0% 0% 0% \n", + "6 No answer 0% 0% 0% 0% 0% 0% 0% 0% \n", + "\n", + " 1949 2001 2011 \n", + "0 69.8% 45.8% 28.9% \n", + "1 15.5% 12.6% 8.5% \n", + "2 5.4% 2.6% 1.7% \n", + "3 6.4% 0.5% 0.4% \n", + "4 1.4% 3.9% 3.5% \n", + "5 0% 19.5% 22.9% \n", + "6 0% 15.1% 34.1% \n", + "Очищенная таблица:\n", + " Denomination 1870 1880 1890 1900 1910 1920 1930 1941 \\\n", + "0 Roman Catholic 72.3% 69.4% 64.7% 60.7% 59.8% 59.1% 60.7% 63.1% \n", + "1 Calvinist 4.8% 6.1% 7.4% 8.9% 9.9% 10.9% 12.1% 13.6% \n", + "2 Lutheran 5.3% 5.5% 5.6% 5.3% 4.9% 4.8% 5% 5.3% \n", + "3 Jewish 16.6% 19.7% 21% 23.6% 23.1% 23.2% 20.3% 15.8% \n", + "4 Others 1% 1.3% 1.3% 1.5% 2.2% 2% 1.9% 1.6% \n", + "5 Without religion 0% 0% 0% 0% 0% 0% 0% 0% \n", + "6 No answer 0% 0% 0% 0% 0% 0% 0% 0% \n", + "\n", + " 1949 2001 2011 \n", + "0 69.8% 45.8% 28.9% \n", + "1 15.5% 12.6% 8.5% \n", + "2 5.4% 2.6% 1.7% \n", + "3 6.4% 0.5% 0.4% \n", + "4 1.4% 3.9% 3.5% \n", + "5 0% 19.5% 22.9% \n", + "6 0% 15.1% 34.1% \n", + "\n", + "==================================================\n", + "\n", + "=== Таблица 19/20 (Индекс в train: 1535) ===\n", + "Оригинальная таблица: csv/204-csv/301.csv\n", + "Размер: 8 строк, 4 колонок\n", + "--------------------------------------------------\n", + "Оригинальная таблица:\n", + " Nationality Name Term as a Deputy Judge \\\n", + "0 Norway Frederik Beichmann 30 January 1920 — 6 December 1930 \n", + "1 China Wang Ch'ung-hui 30 January 1920 — 6 December 1930 \n", + "2 Finland Rafael Erich 15 January 1931 — 1 February 1936 \n", + "3 Portugal Jose Caeiro da Matta 15 January 1931 — 1 February 1936 \n", + "4 Romania Demetre Negulesco 30 January 1920 — 6 December 1930 \n", + "5 Yugoslavia Mileta Novaković 15 January 1931 — 1 February 1936 \n", + "6 Austria Josef Redlich 15 January 1931 — 1 February 1936 \n", + "7 Yugoslavia Mihajlo Jovanović 30 January 1920 — 6 December 1930 \n", + "\n", + " Reason for termination \n", + "0 Not re-elected \n", + "1 Not re-elected \n", + "2 Post abolished \n", + "3 Post abolished \n", + "4 Not re-elected \n", + "5 Post abolished \n", + "6 Post abolished \n", + "7 Not re-elected \n", + "Очищенная таблица:\n", + " Nationality Name Term as a Deputy Judge \\\n", + "0 Norway Frederik Beichmann 30 January 1920 — 6 December 1930 \n", + "1 China Wang Ch'ung-hui 30 January 1920 — 6 December 1930 \n", + "2 Finland Rafael Erich 15 January 1931 — 1 February 1936 \n", + "3 Portugal Jose Caeiro da Matta 15 January 1931 — 1 February 1936 \n", + "4 Romania Demetre Negulesco 30 January 1920 — 6 December 1930 \n", + "5 Yugoslavia Mileta Novaković 15 January 1931 — 1 February 1936 \n", + "6 Austria Josef Redlich 15 January 1931 — 1 February 1936 \n", + "7 Yugoslavia Mihajlo Jovanović 30 January 1920 — 6 December 1930 \n", + "\n", + " Reason for termination \n", + "0 Not re-elected \n", + "1 Not re-elected \n", + "2 Post abolished \n", + "3 Post abolished \n", + "4 Not re-elected \n", + "5 Post abolished \n", + "6 Post abolished \n", + "7 Not re-elected \n", + "\n", + "==================================================\n", + "\n", + "=== Таблица 20/20 (Индекс в train: 3582) ===\n", + "Оригинальная таблица: csv/204-csv/425.csv\n", + "Размер: 154 строк, 5 колонок\n", + "--------------------------------------------------\n", + "Оригинальная таблица:\n", + " Name Lifetime Nationality \\\n", + "0 Jacques d'Adelswärd-Fersen 1880–1923 French \n", + "1 Charles Coypeau d'Assoucy 1605-1677 French \n", + "2 Torstein Dahle b. 1947 Norwegian \n", + "3 Eva Dahlgren b. 1960 Swedish \n", + "4 Jeffrey Dahmer 1960–1994 American \n", + ".. ... ... ... \n", + "149 Eleonora Duse 1859–1924 Italian \n", + "150 Guillaume Dustan 1965–2005 French \n", + "151 Andrea Dworkin 1946–2005 American \n", + "152 Deborah Dyer b. 1967 English \n", + "153 Elana Dykewomon b. 1949 American \n", + "\n", + " Notable as Notes \n", + "0 Aristocrat and writer G \n", + "1 Musician and burlesque poet G \n", + "2 Politician G \n", + "3 Pop musician L \n", + "4 Serial killer G \n", + ".. ... ... \n", + "149 Actor B \n", + "150 Writer G \n", + "151 Feminist, LGBT rights activist L \n", + "152 Rock musician B \n", + "153 Activist, writer L \n", + "\n", + "[154 rows x 5 columns]\n", + "Очищенная таблица:\n", + " Name Lifetime Nationality \\\n", + "0 Jacques d'Adelswärd-Fersen 1880–1923 French \n", + "1 Charles Coypeau d'Assoucy 1605-1677 French \n", + "2 Torstein Dahle b. 1947 Norwegian \n", + "3 Eva Dahlgren b. 1960 Swedish \n", + "4 Jeffrey Dahmer 1960–1994 American \n", + ".. ... ... ... \n", + "149 Eleonora Duse 1859–1924 Italian \n", + "150 Guillaume Dustan 1965–2005 French \n", + "151 Andrea Dworkin 1946–2005 American \n", + "152 Deborah Dyer b. 1967 English \n", + "153 Elana Dykewomon b. 1949 American \n", + "\n", + " Notable as Notes \n", + "0 Aristocrat and writer G \n", + "1 Musician and burlesque poet G \n", + "2 Politician G \n", + "3 Pop musician L \n", + "4 Serial killer G \n", + ".. ... ... \n", + "149 Actor B \n", + "150 Writer G \n", + "151 Feminist, LGBT rights activist L \n", + "152 Rock musician B \n", + "153 Activist, writer L \n", + "\n", + "[154 rows x 5 columns]\n", + "\n", + "==================================================\n", + "\n", + "=== Таблица 21/20 (Индекс в train: 3811) ===\n", + "Оригинальная таблица: csv/203-csv/589.csv\n", + "Размер: 13 строк, 6 колонок\n", + "--------------------------------------------------\n", + "Оригинальная таблица:\n", + " Protein Natural Host \\\n", + "0 Ure2p Saccharomyces cerevisiae \n", + "1 Sup35p Saccharomyces cerevisiae \n", + "2 HET-S Podospora anserina \n", + "3 vacuolar protease B Saccharomyces cerevisiae \n", + "4 MAP kinases Podospora anserina \n", + "5 Rnq1p Saccharomyces cerevisiae \n", + "6 Mca1* Saccharomyces cerevisiae \n", + "7 Swi1 Saccharomyces cerevisiae \n", + "8 Cyc8 Saccharomyces cerevisiae \n", + "9 Mot3 Saccharomyces cerevisiae \n", + "10 Pma1+Std1 Saccharomyces cerevisiae \n", + "11 Sfp1 Saccharomyces cerevisiae \n", + "12 Mod5 Saccharomyces cerevisiae \n", + "\n", + " Normal Function Prion State \\\n", + "0 Nitrogen catabolite repressor [URE3] \n", + "1 Translation termination factor [PSI+] \n", + "2 Regulates heterokaryon incompatibility [Het-s] \n", + "3 death in stationary phase, failure in meiosis [β] \n", + "4 increased pigment, slow growth [C] \n", + "5 Protein template factor [RNQ+],[PIN+] \n", + "6 Putative Yeast Caspase [MCA+] \n", + "7 Chromatin remodeling [SWI+] \n", + "8 Transcriptional repressor [OCT+] \n", + "9 Nuclear transcription factor [MOT3+] \n", + "10 Pma1 = major plasma membrane proton pump, Std1... [GAR+] \n", + "11 Global transcriptional regulator [ISP+] \n", + "12 NaN [MOD+] \n", + "\n", + " Prion Phenotype Year Identified \n", + "0 Growth on poor nitrogen sources 1994 \n", + "1 Increased levels of nonsense suppression 1994 \n", + "2 Heterokaryon formation between incompatible st... 1997 \n", + "3 failure to degrade cellular proteins under N s... 2003 \n", + "4 NaN 2006 \n", + "5 Promotes aggregation of other prions 2008 \n", + "6 Unknown 2008 \n", + "7 Poor growth on some carbon sources 2008 \n", + "8 Transcriptional derepression of multiple genes 2009 \n", + "9 Transcriptional derepression of anaerobic genes 2009 \n", + "10 Resistant to glucose-associated repression 2009 \n", + "11 Antisuppressor of certain sup35 mutations 2010 \n", + "12 NaN 2012 \n", + "Очищенная таблица:\n", + " Protein Natural Host \\\n", + "0 Ure2p Saccharomyces cerevisiae \n", + "1 Sup35p Saccharomyces cerevisiae \n", + "2 HET-S Podospora anserina \n", + "3 vacuolar protease B Saccharomyces cerevisiae \n", + "4 MAP kinases Podospora anserina \n", + "5 Rnq1p Saccharomyces cerevisiae \n", + "6 Mca1* Saccharomyces cerevisiae \n", + "7 Swi1 Saccharomyces cerevisiae \n", + "8 Cyc8 Saccharomyces cerevisiae \n", + "9 Mot3 Saccharomyces cerevisiae \n", + "10 Pma1+Std1 Saccharomyces cerevisiae \n", + "11 Sfp1 Saccharomyces cerevisiae \n", + "12 Mod5 Saccharomyces cerevisiae \n", + "\n", + " Normal Function Prion State \\\n", + "0 Nitrogen catabolite repressor [URE3] \n", + "1 Translation termination factor [PSI+] \n", + "2 Regulates heterokaryon incompatibility [Het-s] \n", + "3 death in stationary phase, failure in meiosis [β] \n", + "4 increased pigment, slow growth [C] \n", + "5 Protein template factor [RNQ+],[PIN+] \n", + "6 Putative Yeast Caspase [MCA+] \n", + "7 Chromatin remodeling [SWI+] \n", + "8 Transcriptional repressor [OCT+] \n", + "9 Nuclear transcription factor [MOT3+] \n", + "10 Pma1 = major plasma membrane proton pump, Std1... [GAR+] \n", + "11 Global transcriptional regulator [ISP+] \n", + "12 NaN [MOD+] \n", + "\n", + " Prion Phenotype Year Identified \n", + "0 Growth on poor nitrogen sources 1994 \n", + "1 Increased levels of nonsense suppression 1994 \n", + "2 Heterokaryon formation between incompatible st... 1997 \n", + "3 failure to degrade cellular proteins under N s... 2003 \n", + "4 NaN 2006 \n", + "5 Promotes aggregation of other prions 2008 \n", + "6 Unknown 2008 \n", + "7 Poor growth on some carbon sources 2008 \n", + "8 Transcriptional derepression of multiple genes 2009 \n", + "9 Transcriptional derepression of anaerobic genes 2009 \n", + "10 Resistant to glucose-associated repression 2009 \n", + "11 Antisuppressor of certain sup35 mutations 2010 \n", + "12 NaN 2012 \n", + "\n", + "==================================================\n", + "\n", + "=== Таблица 22/20 (Индекс в train: 8279) ===\n", + "Оригинальная таблица: csv/203-csv/580.csv\n", + "Размер: 36 строк, 7 колонок\n", + "--------------------------------------------------\n", + "Оригинальная таблица:\n", + " Title Release \\\n", + "0 Bionicle Heroes 2006 \n", + "1 Bionicle: Matoran Adventures 2002 \n", + "2 Bionicle: Maze of Shadows 2005 \n", + "3 Bionicle: Tales of the Tohunga 2001 \n", + "4 Bionicle: The Game 2003 \n", + "5 Drome Racers 2002 \n", + "6 Island Xtreme Stunts 2002 \n", + "7 Lego Alpha Team 2000 \n", + "8 Lego Battles 2009 \n", + "9 Lego Battles: Ninjago 2011 \n", + "10 Lego Chess 1998 \n", + "11 Lego City Undercover 2013 \n", + "12 Lego City Undercover: The Chase Begins 2013 \n", + "13 Lego Creationary 2011 \n", + "14 Lego Creator 1998 \n", + "15 Lego Creator: Knights Kingdom 2000 \n", + "16 Lego Friends 2013 \n", + "17 Lego Island 1997 \n", + "18 Lego Island 2 The Brickster's Revenge 2001 \n", + "19 Lego Knights' Kingdom 2004 \n", + "20 Lego Legends of Chima Online 2014 \n", + "21 Lego Legends of Chima: Laval's Journey 2013 \n", + "22 Lego Legends of Chima: Speedorz 2013 \n", + "23 Lego Loco 1998 \n", + "24 Lego Minifigures Online 2014 \n", + "25 Lego My Style Kindergarten 2000 \n", + "26 Lego My Style Preschool 2000 \n", + "27 Lego Ninjago Nindroids 2014 \n", + "28 Lego Ninjago: The Final Battle 2013 \n", + "29 Lego Racers 1999 \n", + "30 Lego Racers 2 2001 \n", + "31 Lego Rock Raiders 1999 \n", + "32 Lego Soccer Mania 2002 \n", + "33 Lego Stunt Rally 2000 \n", + "34 Lego Universe 2010 \n", + "35 Legoland 1999 \n", + "\n", + " Genre Computer \\\n", + "0 Third-person shooter Windows \n", + "1 Platformer — \n", + "2 Turn Based RPG — \n", + "3 Adventure — \n", + "4 Adventure Windows, OS X \n", + "5 Racing Windows \n", + "6 Action-adventure Windows \n", + "7 Action Windows \n", + "8 Real-time strategy — \n", + "9 Real-time strategy — \n", + "10 Strategy Windows \n", + "11 Open world, Platform game — \n", + "12 Open world, Platform game — \n", + "13 Puzzle — \n", + "14 Construction and management simulation Windows \n", + "15 Construction and management simulation Windows \n", + "16 Miscellaneous — \n", + "17 Action-adventure Windows \n", + "18 Action-adventure Windows \n", + "19 Action-adventure — \n", + "20 Massively multiplayer online (MMO) Windows \n", + "21 Action-adventure — \n", + "22 Racing Windows \n", + "23 Virtual world Windows \n", + "24 Massively multiplayer online (MMO) Windows \n", + "25 Educational Windows, Mac OS \n", + "26 Educational Windows, Mac OS \n", + "27 Action-adventure — \n", + "28 Platformer Windows \n", + "29 Racing Windows \n", + "30 Racing Windows \n", + "31 Real-time strategy, Action Windows \n", + "32 Sport Windows \n", + "33 Racing Windows \n", + "34 Massively multiplayer online (MMO) Windows, OS X \n", + "35 Simulation strategy Windows \n", + "\n", + " Console \\\n", + "0 PlayStation 2, GameCube, Xbox, Xbox 360, Wii \n", + "1 — \n", + "2 — \n", + "3 — \n", + "4 PlayStation 2, GameCube, Xbox \n", + "5 PlayStation 2, GameCube, Xbox \n", + "6 PlayStation 2 \n", + "7 — \n", + "8 — \n", + "9 — \n", + "10 — \n", + "11 Wii U \n", + "12 — \n", + "13 — \n", + "14 — \n", + "15 — \n", + "16 — \n", + "17 — \n", + "18 PlayStation \n", + "19 — \n", + "20 — \n", + "21 — \n", + "22 — \n", + "23 — \n", + "24 — \n", + "25 — \n", + "26 — \n", + "27 — \n", + "28 — \n", + "29 PlayStation, Nintendo 64 \n", + "30 PlayStation 2 \n", + "31 PlayStation \n", + "32 PlayStation 2 \n", + "33 — \n", + "34 — \n", + "35 — \n", + "\n", + " Handheld Smartphone \n", + "0 Nintendo DS — \n", + "1 Game Boy Advance — \n", + "2 Game Boy Advance — \n", + "3 Game Boy Advance — \n", + "4 Game Boy Advance — \n", + "5 Game Boy Advance — \n", + "6 Game Boy Advance — \n", + "7 Game Boy Color — \n", + "8 Nintendo DS — \n", + "9 Nintendo DS iOS \n", + "10 — — \n", + "11 — — \n", + "12 Nintendo 3DS — \n", + "13 — iOS, Android \n", + "14 — — \n", + "15 — — \n", + "16 Nintendo DS, Nintendo 3DS — \n", + "17 — — \n", + "18 Game Boy Color, Game Boy Advance — \n", + "19 Game Boy Advance — \n", + "20 — — \n", + "21 Nintendo DS, Nintendo 3DS, PlayStation Vita — \n", + "22 — iOS \n", + "23 — — \n", + "24 — iOS, Android \n", + "25 — — \n", + "26 — — \n", + "27 Nintendo 3DS, PlayStation Vita — \n", + "28 — — \n", + "29 Game Boy Color — \n", + "30 Game Boy Advance — \n", + "31 — — \n", + "32 Game Boy Advance — \n", + "33 Game Boy Color — \n", + "34 — — \n", + "35 — — \n", + "Очищенная таблица:\n", + " Title Release \\\n", + "0 Bionicle Heroes 2006 \n", + "1 Bionicle: Matoran Adventures 2002 \n", + "2 Bionicle: Maze of Shadows 2005 \n", + "3 Bionicle: Tales of the Tohunga 2001 \n", + "4 Bionicle: The Game 2003 \n", + "5 Drome Racers 2002 \n", + "6 Island Xtreme Stunts 2002 \n", + "7 Lego Alpha Team 2000 \n", + "8 Lego Battles 2009 \n", + "9 Lego Battles: Ninjago 2011 \n", + "10 Lego Chess 1998 \n", + "11 Lego City Undercover 2013 \n", + "12 Lego City Undercover: The Chase Begins 2013 \n", + "13 Lego Creationary 2011 \n", + "14 Lego Creator 1998 \n", + "15 Lego Creator: Knights Kingdom 2000 \n", + "16 Lego Friends 2013 \n", + "17 Lego Island 1997 \n", + "18 Lego Island 2 The Brickster's Revenge 2001 \n", + "19 Lego Knights' Kingdom 2004 \n", + "20 Lego Legends of Chima Online 2014 \n", + "21 Lego Legends of Chima: Laval's Journey 2013 \n", + "22 Lego Legends of Chima: Speedorz 2013 \n", + "23 Lego Loco 1998 \n", + "24 Lego Minifigures Online 2014 \n", + "25 Lego My Style Kindergarten 2000 \n", + "26 Lego My Style Preschool 2000 \n", + "27 Lego Ninjago Nindroids 2014 \n", + "28 Lego Ninjago: The Final Battle 2013 \n", + "29 Lego Racers 1999 \n", + "30 Lego Racers 2 2001 \n", + "31 Lego Rock Raiders 1999 \n", + "32 Lego Soccer Mania 2002 \n", + "33 Lego Stunt Rally 2000 \n", + "34 Lego Universe 2010 \n", + "35 Legoland 1999 \n", + "\n", + " Genre Computer \\\n", + "0 Third-person shooter Windows \n", + "1 Platformer NaN \n", + "2 Turn Based RPG NaN \n", + "3 Adventure NaN \n", + "4 Adventure Windows, OS X \n", + "5 Racing Windows \n", + "6 Action-adventure Windows \n", + "7 Action Windows \n", + "8 Real-time strategy NaN \n", + "9 Real-time strategy NaN \n", + "10 Strategy Windows \n", + "11 Open world, Platform game NaN \n", + "12 Open world, Platform game NaN \n", + "13 Puzzle NaN \n", + "14 Construction and management simulation Windows \n", + "15 Construction and management simulation Windows \n", + "16 Miscellaneous NaN \n", + "17 Action-adventure Windows \n", + "18 Action-adventure Windows \n", + "19 Action-adventure NaN \n", + "20 Massively multiplayer online (MMO) Windows \n", + "21 Action-adventure NaN \n", + "22 Racing Windows \n", + "23 Virtual world Windows \n", + "24 Massively multiplayer online (MMO) Windows \n", + "25 Educational Windows, Mac OS \n", + "26 Educational Windows, Mac OS \n", + "27 Action-adventure NaN \n", + "28 Platformer Windows \n", + "29 Racing Windows \n", + "30 Racing Windows \n", + "31 Real-time strategy, Action Windows \n", + "32 Sport Windows \n", + "33 Racing Windows \n", + "34 Massively multiplayer online (MMO) Windows, OS X \n", + "35 Simulation strategy Windows \n", + "\n", + " Console \\\n", + "0 PlayStation 2, GameCube, Xbox, Xbox 360, Wii \n", + "1 NaN \n", + "2 NaN \n", + "3 NaN \n", + "4 PlayStation 2, GameCube, Xbox \n", + "5 PlayStation 2, GameCube, Xbox \n", + "6 PlayStation 2 \n", + "7 NaN \n", + "8 NaN \n", + "9 NaN \n", + "10 NaN \n", + "11 Wii U \n", + "12 NaN \n", + "13 NaN \n", + "14 NaN \n", + "15 NaN \n", + "16 NaN \n", + "17 NaN \n", + "18 PlayStation \n", + "19 NaN \n", + "20 NaN \n", + "21 NaN \n", + "22 NaN \n", + "23 NaN \n", + "24 NaN \n", + "25 NaN \n", + "26 NaN \n", + "27 NaN \n", + "28 NaN \n", + "29 PlayStation, Nintendo 64 \n", + "30 PlayStation 2 \n", + "31 PlayStation \n", + "32 PlayStation 2 \n", + "33 NaN \n", + "34 NaN \n", + "35 NaN \n", + "\n", + " Handheld Smartphone \n", + "0 Nintendo DS NaN \n", + "1 Game Boy Advance NaN \n", + "2 Game Boy Advance NaN \n", + "3 Game Boy Advance NaN \n", + "4 Game Boy Advance NaN \n", + "5 Game Boy Advance NaN \n", + "6 Game Boy Advance NaN \n", + "7 Game Boy Color NaN \n", + "8 Nintendo DS NaN \n", + "9 Nintendo DS iOS \n", + "10 NaN NaN \n", + "11 NaN NaN \n", + "12 Nintendo 3DS NaN \n", + "13 NaN iOS, Android \n", + "14 NaN NaN \n", + "15 NaN NaN \n", + "16 Nintendo DS, Nintendo 3DS NaN \n", + "17 NaN NaN \n", + "18 Game Boy Color, Game Boy Advance NaN \n", + "19 Game Boy Advance NaN \n", + "20 NaN NaN \n", + "21 Nintendo DS, Nintendo 3DS, PlayStation Vita NaN \n", + "22 NaN iOS \n", + "23 NaN NaN \n", + "24 NaN iOS, Android \n", + "25 NaN NaN \n", + "26 NaN NaN \n", + "27 Nintendo 3DS, PlayStation Vita NaN \n", + "28 NaN NaN \n", + "29 Game Boy Color NaN \n", + "30 Game Boy Advance NaN \n", + "31 NaN NaN \n", + "32 Game Boy Advance NaN \n", + "33 Game Boy Color NaN \n", + "34 NaN NaN \n", + "35 NaN NaN \n", + "\n", + "==================================================\n", + "\n", + "=== Таблица 23/20 (Индекс в train: 9863) ===\n", + "Оригинальная таблица: csv/203-csv/139.csv\n", + "Размер: 16 строк, 6 колонок\n", + "--------------------------------------------------\n", + "Оригинальная таблица:\n", + " Name Abbr. \\\n", + "0 Serbian Progressive Party\\nСрпска напредна стр... SNS \n", + "1 Socialist Party of Serbia\\nСоцијалистичка парт... SPS \n", + "2 Democratic Party\\nДемократска странка / Demokr... DS \n", + "3 Party of United Pensioners of Serbia\\nПартија ... PUPS \n", + "4 New Democratic Party\\nНова демократска странка... NDS \n", + "5 Social Democratic Party of Serbia\\nСоцијалдемо... SDPS \n", + "6 United Serbia\\nЈединствена Србија/ Jedinstvena... JS \n", + "7 League of Social Democrats of Vojvodina\\nЛига ... LSV \n", + "8 New Serbia\\nНова Србија/ Nova Srbija NS \n", + "9 Alliance of Vojvodina Hungarians\\nСавез војвођ... SVM \n", + "10 Serbian Renewal Movement\\nСрпски покрет обнове... SPO \n", + "11 Movement of Socialists\\nПокрет социјалиста/ Po... PS \n", + "12 Party of Democratic Action of Sandžak\\nСтранка... SDA \n", + "13 Together for Serbia\\nЗаједно за Србију / Zajed... ZZS \n", + "14 New Party\\nНова странка / Nova stranka NOVA \n", + "15 Party for Democratic Action\\nПартија за демокр... PDD \n", + "\n", + " Leader Ideology \\\n", + "0 Aleksandar Vučić Conservatism \n", + "1 Ivica Dačić Socialism \n", + "2 Dragan Đilas Social democracy, Third Way \n", + "3 Jovan Krkobabić Pensioners' interests \n", + "4 Boris Tadić Social democracy, Green politics \n", + "5 Rasim Ljajić Social democracy \n", + "6 Dragan Marković Populism \n", + "7 Nenad Čanak Social democracy, Regionalism \n", + "8 Velimir Ilić Populism \n", + "9 István Pásztor Hungarian minority politics \n", + "10 Vuk Drašković Monarchism, Pro-Europeanism \n", + "11 Aleksandar Vulin Socialism, Marxism \n", + "12 Sulejman Ugljanin Bosniak minority politics \n", + "13 Dušan Petrović Social democracy \n", + "14 Zoran Živković Liberalism, Pro-Europeanism \n", + "15 Riza Halimi Albanian minority politics \n", + "\n", + " Political position MPs \n", + "0 Centre-right 134 / 250 \n", + "1 Centre-left to Left-wing 25 / 250 \n", + "2 Centre-left to Centre 17 / 250 \n", + "3 Left-wing 12 / 250 \n", + "4 Centre-left 10 / 250 \n", + "5 Centre-left 10 / 250 \n", + "6 Right-wing 7 / 250 \n", + "7 Centre-left 6 / 250 \n", + "8 Centre-right 6 / 250 \n", + "9 Minority politics 6 / 250 \n", + "10 Centre-right 5 / 250 \n", + "11 Left-wing to Far-left 3 / 250 \n", + "12 Minority politics 3 / 250 \n", + "13 Centre-left 2 / 250 \n", + "14 Centre 2 / 250 \n", + "15 Minority politics 2 / 250 \n", + "Очищенная таблица:\n", + " Name Abbr. \\\n", + "0 Serbian Progressive Party\\nСрпска напредна стр... SNS \n", + "1 Socialist Party of Serbia\\nСоцијалистичка парт... SPS \n", + "2 Democratic Party\\nДемократска странка / Demokr... DS \n", + "3 Party of United Pensioners of Serbia\\nПартија ... PUPS \n", + "4 New Democratic Party\\nНова демократска странка... NDS \n", + "5 Social Democratic Party of Serbia\\nСоцијалдемо... SDPS \n", + "6 United Serbia\\nЈединствена Србија/ Jedinstvena... JS \n", + "7 League of Social Democrats of Vojvodina\\nЛига ... LSV \n", + "8 New Serbia\\nНова Србија/ Nova Srbija NS \n", + "9 Alliance of Vojvodina Hungarians\\nСавез војвођ... SVM \n", + "10 Serbian Renewal Movement\\nСрпски покрет обнове... SPO \n", + "11 Movement of Socialists\\nПокрет социјалиста/ Po... PS \n", + "12 Party of Democratic Action of Sandžak\\nСтранка... SDA \n", + "13 Together for Serbia\\nЗаједно за Србију / Zajed... ZZS \n", + "14 New Party\\nНова странка / Nova stranka NOVA \n", + "15 Party for Democratic Action\\nПартија за демокр... PDD \n", + "\n", + " Leader Ideology \\\n", + "0 Aleksandar Vučić Conservatism \n", + "1 Ivica Dačić Socialism \n", + "2 Dragan Đilas Social democracy, Third Way \n", + "3 Jovan Krkobabić Pensioners' interests \n", + "4 Boris Tadić Social democracy, Green politics \n", + "5 Rasim Ljajić Social democracy \n", + "6 Dragan Marković Populism \n", + "7 Nenad Čanak Social democracy, Regionalism \n", + "8 Velimir Ilić Populism \n", + "9 István Pásztor Hungarian minority politics \n", + "10 Vuk Drašković Monarchism, Pro-Europeanism \n", + "11 Aleksandar Vulin Socialism, Marxism \n", + "12 Sulejman Ugljanin Bosniak minority politics \n", + "13 Dušan Petrović Social democracy \n", + "14 Zoran Živković Liberalism, Pro-Europeanism \n", + "15 Riza Halimi Albanian minority politics \n", + "\n", + " Political position MPs \n", + "0 Centre-right 134 / 250 \n", + "1 Centre-left to Left-wing 25 / 250 \n", + "2 Centre-left to Centre 17 / 250 \n", + "3 Left-wing 12 / 250 \n", + "4 Centre-left 10 / 250 \n", + "5 Centre-left 10 / 250 \n", + "6 Right-wing 7 / 250 \n", + "7 Centre-left 6 / 250 \n", + "8 Centre-right 6 / 250 \n", + "9 Minority politics 6 / 250 \n", + "10 Centre-right 5 / 250 \n", + "11 Left-wing to Far-left 3 / 250 \n", + "12 Minority politics 3 / 250 \n", + "13 Centre-left 2 / 250 \n", + "14 Centre 2 / 250 \n", + "15 Minority politics 2 / 250 \n", + "\n", + "==================================================\n", + "\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\PC\\MyProjects\\Semtab\\WTQ\\Qwen\\normalize.py:112: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n", + " clean_date_attempt = df[col].str.replace(r'\\*$', '', regex=True)\n", + "C:\\Users\\PC\\MyProjects\\Semtab\\WTQ\\Qwen\\normalize.py:112: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n", + " clean_date_attempt = df[col].str.replace(r'\\*$', '', regex=True)\n", + "C:\\Users\\PC\\MyProjects\\Semtab\\WTQ\\Qwen\\normalize.py:112: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n", + " clean_date_attempt = df[col].str.replace(r'\\*$', '', regex=True)\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "=== Таблица 24/20 (Индекс в train: 434) ===\n", + "Оригинальная таблица: csv/203-csv/336.csv\n", + "Размер: 27 строк, 9 колонок\n", + "--------------------------------------------------\n", + "Оригинальная таблица:\n", + " # Date Opponent Score Win Loss \\\n", + "0 74 July 1 Mariners 4 - 3 Perry (6-7) Leal (5-6) \n", + "1 75 July 2 @ Twins 9 - 4 Murray (4-3) Havens (3-6) \n", + "2 76 July 3 @ Twins 2 - 1 O'Connor (1-2) Clancy (7-5) \n", + "3 77 July 4 @ Twins 4 - 3 Little (1-0) Stieb (6-10) \n", + "4 78 July 5 @ Rangers 3 - 2 Matlack (4-5) Leal (5-7) \n", + "5 79 July 6 @ Rangers 4 - 3 Murray (5-3) Tanana (4-10) \n", + "6 80 July 7 @ Royals 3 - 1 Gura (10-4) Clancy (7-6) \n", + "7 81 July 8 @ Royals 5 - 4 Stieb (7-10) Armstrong (2-3) \n", + "8 82 July 9 White Sox 7 - 6 Murray (6-3) Dotson (3-10) \n", + "9 83 July 10 White Sox 6 - 5 Escárrega (1-1) Gott (1-5) \n", + "10 84 July 11 White Sox 16 - 7 Burns (9-4) Clancy (7-7) \n", + "11 85 July 15 Rangers 5 - 1 Stieb (8-10) Honeycutt (4-10) \n", + "12 86 July 16 Rangers 6 - 0 Clancy (8-7) Hough (7-8) \n", + "13 87 July 17 Rangers 11 - 3 Jackson (3-6) Butcher (0-1) \n", + "14 88 July 18 Rangers 5 - 4 (10) McLaughlin (8-3) Darwin (6-4) \n", + "15 89 July 19 Royals 4 - 2 Leal (6-7) Black (3-3) \n", + "16 90 July 20 Royals 9 - 2 Stieb (9-10) Gura (10-7) \n", + "17 91 July 21 Royals 9 - 7 Blue (7-7) Clancy (8-8) \n", + "18 92 July 22 @ White Sox 3 - 2 Burns (10-4) McLaughlin (8-4) \n", + "19 93 July 23 @ White Sox 7 - 1 Leal (7-7) Barnes (0-2) \n", + "20 94 July 24 @ White Sox 8 - 1 Stieb (10-10) Lamp (7-5) \n", + "21 95 July 25 @ White Sox 5 - 3 Hoyt (12-9) Clancy (8-9) \n", + "22 96 July 26 @ Red Sox 3 - 2 Eckersley (11-8) Gott (1-6) \n", + "23 97 July 27 @ Red Sox 3 - 1 Leal (8-7) Tudor (6-8) \n", + "24 98 July 28 @ Red Sox 9 - 7 Ojeda (4-5) Jackson (3-7) \n", + "25 99 July 30 Tigers 6 - 5 (12) Murray (7-3) James (0-2) \n", + "26 100 July 31 Tigers 1 - 0 (10) Gott (2-6) Rucker (1-1) \n", + "\n", + " Save Attendance Record \n", + "0 Caudill (14) 21,004 33-41 \n", + "1 McLaughlin (3) 7,503 34-41 \n", + "2 NaN 9,591 34-42 \n", + "3 NaN 6,532 34-43 \n", + "4 Darwin (4) 29,126 34-44 \n", + "5 NaN 9,657 35-44 \n", + "6 NaN 22,217 35-45 \n", + "7 McLaughlin (4) 24,409 36-45 \n", + "8 McLaughlin (5) 15,131 37-45 \n", + "9 Hickey (4) 17,035 37-46 \n", + "10 NaN 16,169 37-47 \n", + "11 NaN 14,123 38-47 \n", + "12 NaN 13,359 39-47 \n", + "13 NaN 17,080 40-47 \n", + "14 NaN 15,512 41-47 \n", + "15 NaN 16,466 42-47 \n", + "16 NaN 18,552 43-47 \n", + "17 Quisenberry (23) 19,152 43-48 \n", + "18 NaN 21,875 43-49 \n", + "19 Jackson (5) 27,770 44-49 \n", + "20 Murray (1) 21,821 45-49 \n", + "21 NaN 17,452 45-50 \n", + "22 NaN 22,261 45-51 \n", + "23 NaN 27,077 46-51 \n", + "24 Stanley (8) 18,627 46-52 \n", + "25 NaN 18,262 47-52 \n", + "26 NaN 21,007 48-52 \n", + "Очищенная таблица:\n", + " # Date Opponent Score Win Loss \\\n", + "0 74 July 1 Mariners 4 - 3 Perry (6-7) Leal (5-6) \n", + "1 75 July 2 @ Twins 9 - 4 Murray (4-3) Havens (3-6) \n", + "2 76 July 3 @ Twins 2 - 1 O'Connor (1-2) Clancy (7-5) \n", + "3 77 July 4 @ Twins 4 - 3 Little (1-0) Stieb (6-10) \n", + "4 78 July 5 @ Rangers 3 - 2 Matlack (4-5) Leal (5-7) \n", + "5 79 July 6 @ Rangers 4 - 3 Murray (5-3) Tanana (4-10) \n", + "6 80 July 7 @ Royals 3 - 1 Gura (10-4) Clancy (7-6) \n", + "7 81 July 8 @ Royals 5 - 4 Stieb (7-10) Armstrong (2-3) \n", + "8 82 July 9 White Sox 7 - 6 Murray (6-3) Dotson (3-10) \n", + "9 83 July 10 White Sox 6 - 5 Escárrega (1-1) Gott (1-5) \n", + "10 84 July 11 White Sox 16 - 7 Burns (9-4) Clancy (7-7) \n", + "11 85 July 15 Rangers 5 - 1 Stieb (8-10) Honeycutt (4-10) \n", + "12 86 July 16 Rangers 6 - 0 Clancy (8-7) Hough (7-8) \n", + "13 87 July 17 Rangers 11 - 3 Jackson (3-6) Butcher (0-1) \n", + "14 88 July 18 Rangers 5 - 4 (10) McLaughlin (8-3) Darwin (6-4) \n", + "15 89 July 19 Royals 4 - 2 Leal (6-7) Black (3-3) \n", + "16 90 July 20 Royals 9 - 2 Stieb (9-10) Gura (10-7) \n", + "17 91 July 21 Royals 9 - 7 Blue (7-7) Clancy (8-8) \n", + "18 92 July 22 @ White Sox 3 - 2 Burns (10-4) McLaughlin (8-4) \n", + "19 93 July 23 @ White Sox 7 - 1 Leal (7-7) Barnes (0-2) \n", + "20 94 July 24 @ White Sox 8 - 1 Stieb (10-10) Lamp (7-5) \n", + "21 95 July 25 @ White Sox 5 - 3 Hoyt (12-9) Clancy (8-9) \n", + "22 96 July 26 @ Red Sox 3 - 2 Eckersley (11-8) Gott (1-6) \n", + "23 97 July 27 @ Red Sox 3 - 1 Leal (8-7) Tudor (6-8) \n", + "24 98 July 28 @ Red Sox 9 - 7 Ojeda (4-5) Jackson (3-7) \n", + "25 99 July 30 Tigers 6 - 5 (12) Murray (7-3) James (0-2) \n", + "26 100 July 31 Tigers 1 - 0 (10) Gott (2-6) Rucker (1-1) \n", + "\n", + " Save Attendance Record Date_datetime Win_base Win_meta \\\n", + "0 Caudill (14) 21004 33-41 1-07-01 Perry 6-7 \n", + "1 McLaughlin (3) 7503 34-41 1-07-02 Murray 4-3 \n", + "2 NaN 9591 34-42 1-07-03 O'Connor 1-2 \n", + "3 NaN 6532 34-43 1-07-04 Little 1-0 \n", + "4 Darwin (4) 29126 34-44 1-07-05 Matlack 4-5 \n", + "5 NaN 9657 35-44 1-07-06 Murray 5-3 \n", + "6 NaN 22217 35-45 1-07-07 Gura 10-4 \n", + "7 McLaughlin (4) 24409 36-45 1-07-08 Stieb 7-10 \n", + "8 McLaughlin (5) 15131 37-45 1-07-09 Murray 6-3 \n", + "9 Hickey (4) 17035 37-46 1-07-10 Escárrega 1-1 \n", + "10 NaN 16169 37-47 1-07-11 Burns 9-4 \n", + "11 NaN 14123 38-47 1-07-15 Stieb 8-10 \n", + "12 NaN 13359 39-47 1-07-16 Clancy 8-7 \n", + "13 NaN 17080 40-47 1-07-17 Jackson 3-6 \n", + "14 NaN 15512 41-47 1-07-18 McLaughlin 8-3 \n", + "15 NaN 16466 42-47 1-07-19 Leal 6-7 \n", + "16 NaN 18552 43-47 1-07-20 Stieb 9-10 \n", + "17 Quisenberry (23) 19152 43-48 1-07-21 Blue 7-7 \n", + "18 NaN 21875 43-49 1-07-22 Burns 10-4 \n", + "19 Jackson (5) 27770 44-49 1-07-23 Leal 7-7 \n", + "20 Murray (1) 21821 45-49 1-07-24 Stieb 10-10 \n", + "21 NaN 17452 45-50 1-07-25 Hoyt 12-9 \n", + "22 NaN 22261 45-51 1-07-26 Eckersley 11-8 \n", + "23 NaN 27077 46-51 1-07-27 Leal 8-7 \n", + "24 Stanley (8) 18627 46-52 1-07-28 Ojeda 4-5 \n", + "25 NaN 18262 47-52 1-07-30 Murray 7-3 \n", + "26 NaN 21007 48-52 1-07-31 Gott 2-6 \n", + "\n", + " Loss_base Loss_meta Save_base Save_meta Record_score1 Record_score2 \n", + "0 Leal 5-6 Caudill 14 33 41 \n", + "1 Havens 3-6 McLaughlin 3 34 41 \n", + "2 Clancy 7-5 NaN NaN 34 42 \n", + "3 Stieb 6-10 NaN NaN 34 43 \n", + "4 Leal 5-7 Darwin 4 34 44 \n", + "5 Tanana 4-10 NaN NaN 35 44 \n", + "6 Clancy 7-6 NaN NaN 35 45 \n", + "7 Armstrong 2-3 McLaughlin 4 36 45 \n", + "8 Dotson 3-10 McLaughlin 5 37 45 \n", + "9 Gott 1-5 Hickey 4 37 46 \n", + "10 Clancy 7-7 NaN NaN 37 47 \n", + "11 Honeycutt 4-10 NaN NaN 38 47 \n", + "12 Hough 7-8 NaN NaN 39 47 \n", + "13 Butcher 0-1 NaN NaN 40 47 \n", + "14 Darwin 6-4 NaN NaN 41 47 \n", + "15 Black 3-3 NaN NaN 42 47 \n", + "16 Gura 10-7 NaN NaN 43 47 \n", + "17 Clancy 8-8 Quisenberry 23 43 48 \n", + "18 McLaughlin 8-4 NaN NaN 43 49 \n", + "19 Barnes 0-2 Jackson 5 44 49 \n", + "20 Lamp 7-5 Murray 1 45 49 \n", + "21 Clancy 8-9 NaN NaN 45 50 \n", + "22 Gott 1-6 NaN NaN 45 51 \n", + "23 Tudor 6-8 NaN NaN 46 51 \n", + "24 Jackson 3-7 Stanley 8 46 52 \n", + "25 James 0-2 NaN NaN 47 52 \n", + "26 Rucker 1-1 NaN NaN 48 52 \n", + "\n", + "==================================================\n", + "\n", + "=== Таблица 25/20 (Индекс в train: 9195) ===\n", + "Оригинальная таблица: csv/202-csv/38.csv\n", + "Размер: 15 строк, 4 колонок\n", + "--------------------------------------------------\n", + "Оригинальная таблица:\n", + " Year \\\n", + "1995 New York Undercover Herself \\Private Enemy No. 1\\\" (episode 14 \n", + "1998 The Jamie Foxx Show Ola Mae \\Papa Don't Preach\\\" (episode 14 \n", + "1999 Moesha Herself \\Good Vibrations?\\\" (episode 1 \n", + "2001 Angel: One More Road to Cross Guardian Angel Direct to DVD \n", + " Prison Song Mrs. Butler Main Role \n", + " Strong Medicine Simone Fellows \\History\\\" (episode 4 \n", + "2007 Ghost Whisperer Jackie Boyd \\Mean Ghost\\\" (episode 15 \n", + " Entourage Herself \\Gary's Desk\\\" (episode 8 \n", + "2009 I Can Do Bad All By Myself Tanya Supporting Role \n", + " 30 Rock Herself Guest \n", + "2010 American Idol Guest judge/Herself Auditions were held in Atlanta, Georgia at the... \n", + "2012 Rock of Ages Justice Charlier NaN \n", + "2013 Betty and Coretta Dr. Betty Shabazz An original Lifetime movie \n", + " The X Factor Guest judge/herself Blige assisted Nicole Scherzinger at her judge... \n", + " Black Nativity Platinum Fro NaN \n", + "\n", + " Title \\\n", + "1995 New York Undercover Herself season 1) \n", + "1998 The Jamie Foxx Show Ola Mae season 2)\" \n", + "1999 Moesha Herself season 5)\" \n", + "2001 Angel: One More Road to Cross Guardian Angel NaN \n", + " Prison Song Mrs. Butler NaN \n", + " Strong Medicine Simone Fellows season 2)\" \n", + "2007 Ghost Whisperer Jackie Boyd season 2)\" \n", + " Entourage Herself season 4)\" \n", + "2009 I Can Do Bad All By Myself Tanya NaN \n", + " 30 Rock Herself NaN \n", + "2010 American Idol Guest judge/Herself NaN \n", + "2012 Rock of Ages Justice Charlier NaN \n", + "2013 Betty and Coretta Dr. Betty Shabazz NaN \n", + " The X Factor Guest judge/herself NaN \n", + " Black Nativity Platinum Fro NaN \n", + "\n", + " Role \\\n", + "1995 New York Undercover Herself \\\"Tag You're Dead\\\" (episode 2 \n", + "1998 The Jamie Foxx Show Ola Mae NaN \n", + "1999 Moesha Herself NaN \n", + "2001 Angel: One More Road to Cross Guardian Angel NaN \n", + " Prison Song Mrs. Butler NaN \n", + " Strong Medicine Simone Fellows NaN \n", + "2007 Ghost Whisperer Jackie Boyd NaN \n", + " Entourage Herself NaN \n", + "2009 I Can Do Bad All By Myself Tanya NaN \n", + " 30 Rock Herself NaN \n", + "2010 American Idol Guest judge/Herself NaN \n", + "2012 Rock of Ages Justice Charlier NaN \n", + "2013 Betty and Coretta Dr. Betty Shabazz NaN \n", + " The X Factor Guest judge/herself NaN \n", + " Black Nativity Platinum Fro NaN \n", + "\n", + " Notes \n", + "1995 New York Undercover Herself season 2) [music performance]\" \n", + "1998 The Jamie Foxx Show Ola Mae NaN \n", + "1999 Moesha Herself NaN \n", + "2001 Angel: One More Road to Cross Guardian Angel NaN \n", + " Prison Song Mrs. Butler NaN \n", + " Strong Medicine Simone Fellows NaN \n", + "2007 Ghost Whisperer Jackie Boyd NaN \n", + " Entourage Herself NaN \n", + "2009 I Can Do Bad All By Myself Tanya NaN \n", + " 30 Rock Herself NaN \n", + "2010 American Idol Guest judge/Herself NaN \n", + "2012 Rock of Ages Justice Charlier NaN \n", + "2013 Betty and Coretta Dr. Betty Shabazz NaN \n", + " The X Factor Guest judge/herself NaN \n", + " Black Nativity Platinum Fro NaN \n", + "Очищенная таблица:\n", + " Year \\\n", + "1995 New York Undercover Herself \\Private Enemy No. 1\\\" (episode 14 \n", + "1998 The Jamie Foxx Show Ola Mae \\Papa Don't Preach\\\" (episode 14 \n", + "1999 Moesha Herself \\Good Vibrations?\\\" (episode 1 \n", + "2001 Angel: One More Road to Cross Guardian Angel Direct to DVD \n", + " Prison Song Mrs. Butler Main Role \n", + " Strong Medicine Simone Fellows \\History\\\" (episode 4 \n", + "2007 Ghost Whisperer Jackie Boyd \\Mean Ghost\\\" (episode 15 \n", + " Entourage Herself \\Gary's Desk\\\" (episode 8 \n", + "2009 I Can Do Bad All By Myself Tanya Supporting Role \n", + " 30 Rock Herself Guest \n", + "2010 American Idol Guest judge/Herself Auditions were held in Atlanta, Georgia at the... \n", + "2012 Rock of Ages Justice Charlier NaN \n", + "2013 Betty and Coretta Dr. Betty Shabazz An original Lifetime movie \n", + " The X Factor Guest judge/herself Blige assisted Nicole Scherzinger at her judge... \n", + " Black Nativity Platinum Fro NaN \n", + "\n", + " Title \\\n", + "1995 New York Undercover Herself season 1) \n", + "1998 The Jamie Foxx Show Ola Mae season 2)\" \n", + "1999 Moesha Herself season 5)\" \n", + "2001 Angel: One More Road to Cross Guardian Angel NaN \n", + " Prison Song Mrs. Butler NaN \n", + " Strong Medicine Simone Fellows season 2)\" \n", + "2007 Ghost Whisperer Jackie Boyd season 2)\" \n", + " Entourage Herself season 4)\" \n", + "2009 I Can Do Bad All By Myself Tanya NaN \n", + " 30 Rock Herself NaN \n", + "2010 American Idol Guest judge/Herself NaN \n", + "2012 Rock of Ages Justice Charlier NaN \n", + "2013 Betty and Coretta Dr. Betty Shabazz NaN \n", + " The X Factor Guest judge/herself NaN \n", + " Black Nativity Platinum Fro NaN \n", + "\n", + " Role \\\n", + "1995 New York Undercover Herself \\\"Tag You're Dead\\\" (episode 2 \n", + "1998 The Jamie Foxx Show Ola Mae NaN \n", + "1999 Moesha Herself NaN \n", + "2001 Angel: One More Road to Cross Guardian Angel NaN \n", + " Prison Song Mrs. Butler NaN \n", + " Strong Medicine Simone Fellows NaN \n", + "2007 Ghost Whisperer Jackie Boyd NaN \n", + " Entourage Herself NaN \n", + "2009 I Can Do Bad All By Myself Tanya NaN \n", + " 30 Rock Herself NaN \n", + "2010 American Idol Guest judge/Herself NaN \n", + "2012 Rock of Ages Justice Charlier NaN \n", + "2013 Betty and Coretta Dr. Betty Shabazz NaN \n", + " The X Factor Guest judge/herself NaN \n", + " Black Nativity Platinum Fro NaN \n", + "\n", + " Notes \n", + "1995 New York Undercover Herself season 2) [music performance]\" \n", + "1998 The Jamie Foxx Show Ola Mae NaN \n", + "1999 Moesha Herself NaN \n", + "2001 Angel: One More Road to Cross Guardian Angel NaN \n", + " Prison Song Mrs. Butler NaN \n", + " Strong Medicine Simone Fellows NaN \n", + "2007 Ghost Whisperer Jackie Boyd NaN \n", + " Entourage Herself NaN \n", + "2009 I Can Do Bad All By Myself Tanya NaN \n", + " 30 Rock Herself NaN \n", + "2010 American Idol Guest judge/Herself NaN \n", + "2012 Rock of Ages Justice Charlier NaN \n", + "2013 Betty and Coretta Dr. Betty Shabazz NaN \n", + " The X Factor Guest judge/herself NaN \n", + " Black Nativity Platinum Fro NaN \n", + "\n", + "==================================================\n", + "\n", + "=== Таблица 26/20 (Индекс в train: 3257) ===\n", + "Оригинальная таблица: csv/204-csv/617.csv\n", + "Размер: 12 строк, 7 колонок\n", + "--------------------------------------------------\n", + "Оригинальная таблица:\n", + " Date Opponent# Rank# \\\n", + "0 September 18 SMU* #10 \n", + "1 September 25 at Pittsburgh* #11 \n", + "2 October 2 #17 USC* #8 \n", + "3 October 9 vs. #3 Texas* #8 \n", + "4 October 16 #6 Colorado #2 \n", + "5 October 23 at Kansas State #2 \n", + "6 October 30 Iowa State #2 \n", + "7 November 6 at Missouri #2 \n", + "8 November 13 Kansas #2 \n", + "9 November 25 #1 Nebraska #2 \n", + "10 December 4 at Oklahoma State #3 \n", + "11 January 1 vs. #5 Auburn* #3 \n", + "\n", + " Site TV Result Attendance \n", + "0 Oklahoma Memorial Stadium • Norman, OK NaN W 30–0 53,545 \n", + "1 Pitt Stadium • Pittsburgh, PA NaN W 55–29 34,916 \n", + "2 Oklahoma Memorial Stadium • Norman, OK NaN W 33–20 62,351 \n", + "3 Cotton Bowl • Dallas, TX (Red River Shootout) CBS W 48–27 72,032 \n", + "4 Oklahoma Memorial Stadium • Norman, OK NaN W 45–17 62,501 \n", + "5 KSU Stadium • Manhattan, KS NaN W 75–28 37,198 \n", + "6 Oklahoma Memorial Stadium • Norman, OK NaN W 39–7 60,477 \n", + "7 Memorial Stadium • Columbia, MO NaN W 20–3 55,098 \n", + "8 Oklahoma Memorial Stadium • Norman, OK ABC W 56–10 54,347 \n", + "9 Oklahoma Memorial Stadium • Norman, OK (Rivalry) ABC L 31–35 62,884 \n", + "10 Lewis Field • Stillwater, OK (Bedlam Series) NaN W 58–14 36,571 \n", + "11 Tulane Stadium • New Orleans, LA (Sugar Bowl) ABC W 40–22 84,031 \n", + "Очищенная таблица:\n", + " Date Opponent# Rank# \\\n", + "0 September 18 SMU* #10 \n", + "1 September 25 at Pittsburgh* #11 \n", + "2 October 2 #17 USC* #8 \n", + "3 October 9 vs. #3 Texas* #8 \n", + "4 October 16 #6 Colorado #2 \n", + "5 October 23 at Kansas State #2 \n", + "6 October 30 Iowa State #2 \n", + "7 November 6 at Missouri #2 \n", + "8 November 13 Kansas #2 \n", + "9 November 25 #1 Nebraska #2 \n", + "10 December 4 at Oklahoma State #3 \n", + "11 January 1 vs. #5 Auburn* #3 \n", + "\n", + " Site TV Result \\\n", + "0 Oklahoma Memorial Stadium • Norman, OK NaN W 30–0 \n", + "1 Pitt Stadium • Pittsburgh, PA NaN W 55–29 \n", + "2 Oklahoma Memorial Stadium • Norman, OK NaN W 33–20 \n", + "3 Cotton Bowl • Dallas, TX (Red River Shootout) CBS W 48–27 \n", + "4 Oklahoma Memorial Stadium • Norman, OK NaN W 45–17 \n", + "5 KSU Stadium • Manhattan, KS NaN W 75–28 \n", + "6 Oklahoma Memorial Stadium • Norman, OK NaN W 39–7 \n", + "7 Memorial Stadium • Columbia, MO NaN W 20–3 \n", + "8 Oklahoma Memorial Stadium • Norman, OK ABC W 56–10 \n", + "9 Oklahoma Memorial Stadium • Norman, OK (Rivalry) ABC L 31–35 \n", + "10 Lewis Field • Stillwater, OK (Bedlam Series) NaN W 58–14 \n", + "11 Tulane Stadium • New Orleans, LA (Sugar Bowl) ABC W 40–22 \n", + "\n", + " Attendance Date_datetime Result_score1 Result_score2 \n", + "0 53545 1-09-18 30 0 \n", + "1 34916 1-09-25 55 29 \n", + "2 62351 1-10-02 33 20 \n", + "3 72032 1-10-09 48 27 \n", + "4 62501 1-10-16 45 17 \n", + "5 37198 1-10-23 75 28 \n", + "6 60477 1-10-30 39 7 \n", + "7 55098 1-11-06 20 3 \n", + "8 54347 1-11-13 56 10 \n", + "9 62884 1-11-25 31 35 \n", + "10 36571 1-12-04 58 14 \n", + "11 84031 1-01-01 40 22 \n", + "\n", + "==================================================\n", + "\n", + "=== Таблица 27/20 (Индекс в train: 11731) ===\n", + "Оригинальная таблица: csv/204-csv/100.csv\n", + "Размер: 13 строк, 5 колонок\n", + "--------------------------------------------------\n", + "Оригинальная таблица:\n", + " Date Name of ship Nationality Tonnage \\\n", + "0 5 June 1940 SS Stancor United Kingdom 798 \n", + "1 7 June 1940 SS Frances Massey United Kingdom 4,212 \n", + "2 7 June 1940 SS Eros United Kingdom 5,888 \n", + "3 11 June 1940 SS Violando N Goulandris Greece 2,375 \n", + "4 19 June 1940 MV Tudor Norway 6,607 \n", + "5 19 June 1940 SS Baron Loudoun United Kingdom 3,164 \n", + "6 19 June 1940 SS British Monarch United Kingdom 5,661 \n", + "7 20 June 1940 MV Moerdrecht Netherlands 7,493 \n", + "8 16 August 1940 SS Hedrun Sweden 2,325 \n", + "9 19 August 1940 SS Ville de Gand Belgium 7,590 \n", + "10 24 August 1940 SS La Brea United Kingdom 6,666 \n", + "11 25 August 1940 SS Empire Merlin United Kingdom 5,763 \n", + "12 25 August 1940 MV Athelcrest United Kingdom 6,825 \n", + "\n", + " Fate \n", + "0 Sunk at 58°48′N 08°45′W / 58.800°N 8.750°W \n", + "1 Sunk at 55°33′N 08°26′W / 55.550°N 8.433°W \n", + "2 Damaged at 55°33′N 08°26′W / 55.550°N 8.433°W \n", + "3 Sunk at 44°04′N 12°30′W / 44.067°N 12.500°W \n", + "4 Sunk at 45°10′N 11°50′W / 45.167°N 11.833°W \n", + "5 Sunk at 45°00′N 11°21′W / 45.000°N 11.350°W \n", + "6 Sunk at 45°00′N 11°21′W / 45.000°N 11.350°W \n", + "7 Sunk at 43°34′N 14°20′W / 43.567°N 14.333°W \n", + "8 Sunk at 57°10′N 16°37′W / 57.167°N 16.617°W \n", + "9 Sunk at 55°28′N 15°10′W / 55.467°N 15.167°W \n", + "10 Sunk at 57°24′N 11°21′W / 57.400°N 11.350°W \n", + "11 Sunk at 58°30′N 10°15′W / 58.500°N 10.250°W \n", + "12 Sunk at 58°24′N 11°25′W / 58.400°N 11.417°W \n", + "Очищенная таблица:\n", + " Date Name of ship Nationality Tonnage \\\n", + "0 5 June 1940 SS Stancor United Kingdom 798 \n", + "1 7 June 1940 SS Frances Massey United Kingdom 4212 \n", + "2 7 June 1940 SS Eros United Kingdom 5888 \n", + "3 11 June 1940 SS Violando N Goulandris Greece 2375 \n", + "4 19 June 1940 MV Tudor Norway 6607 \n", + "5 19 June 1940 SS Baron Loudoun United Kingdom 3164 \n", + "6 19 June 1940 SS British Monarch United Kingdom 5661 \n", + "7 20 June 1940 MV Moerdrecht Netherlands 7493 \n", + "8 16 August 1940 SS Hedrun Sweden 2325 \n", + "9 19 August 1940 SS Ville de Gand Belgium 7590 \n", + "10 24 August 1940 SS La Brea United Kingdom 6666 \n", + "11 25 August 1940 SS Empire Merlin United Kingdom 5763 \n", + "12 25 August 1940 MV Athelcrest United Kingdom 6825 \n", + "\n", + " Fate Date_datetime \n", + "0 Sunk at 58°48′N 08°45′W / 58.800°N 8.750°W 1940-06-05 \n", + "1 Sunk at 55°33′N 08°26′W / 55.550°N 8.433°W 1940-06-07 \n", + "2 Damaged at 55°33′N 08°26′W / 55.550°N 8.433°W 1940-06-07 \n", + "3 Sunk at 44°04′N 12°30′W / 44.067°N 12.500°W 1940-06-11 \n", + "4 Sunk at 45°10′N 11°50′W / 45.167°N 11.833°W 1940-06-19 \n", + "5 Sunk at 45°00′N 11°21′W / 45.000°N 11.350°W 1940-06-19 \n", + "6 Sunk at 45°00′N 11°21′W / 45.000°N 11.350°W 1940-06-19 \n", + "7 Sunk at 43°34′N 14°20′W / 43.567°N 14.333°W 1940-06-20 \n", + "8 Sunk at 57°10′N 16°37′W / 57.167°N 16.617°W 1940-08-16 \n", + "9 Sunk at 55°28′N 15°10′W / 55.467°N 15.167°W 1940-08-19 \n", + "10 Sunk at 57°24′N 11°21′W / 57.400°N 11.350°W 1940-08-24 \n", + "11 Sunk at 58°30′N 10°15′W / 58.500°N 10.250°W 1940-08-25 \n", + "12 Sunk at 58°24′N 11°25′W / 58.400°N 11.417°W 1940-08-25 \n", + "\n", + "==================================================\n", + "\n", + "=== Таблица 28/20 (Индекс в train: 10647) ===\n", + "Оригинальная таблица: csv/204-csv/96.csv\n", + "Размер: 12 строк, 5 колонок\n", + "--------------------------------------------------\n", + "Оригинальная таблица:\n", + " Week Date Opponent Result Attendance\n", + "0 1 September 28, 1952 at Dallas Texans W 24–6 17,499\n", + "1 2 October 4, 1952 at Philadelphia Eagles W 31–7 18,648\n", + "2 3 October 12, 1952 at Cleveland Browns W 17–9 51,858\n", + "3 4 October 19, 1952 Chicago Cardinals L 24–23 41,182\n", + "4 5 October 26, 1952 Philadelphia Eagles L 14–10 21,458\n", + "5 6 November 2, 1952 at Chicago Cardinals W 28–6 27,195\n", + "6 7 November 9, 1952 San Francisco 49ers W 23–14 54,230\n", + "7 8 November 16, 1952 Green Bay Packers L 17–3 26,723\n", + "8 9 November 23, 1952 at Washington Redskins W 14–10 21,125\n", + "9 10 November 30, 1952 at Pittsburgh Steelers L 63–7 15,140\n", + "10 11 December 7, 1952 Washington Redskins L 27–17 21,237\n", + "11 12 December 14, 1952 Cleveland Browns W 37–34 41,610\n", + "Очищенная таблица:\n", + " Week Date Opponent Result Attendance \\\n", + "0 1 September 28, 1952 at Dallas Texans W 24–6 17499 \n", + "1 2 October 4, 1952 at Philadelphia Eagles W 31–7 18648 \n", + "2 3 October 12, 1952 at Cleveland Browns W 17–9 51858 \n", + "3 4 October 19, 1952 Chicago Cardinals L 24–23 41182 \n", + "4 5 October 26, 1952 Philadelphia Eagles L 14–10 21458 \n", + "5 6 November 2, 1952 at Chicago Cardinals W 28–6 27195 \n", + "6 7 November 9, 1952 San Francisco 49ers W 23–14 54230 \n", + "7 8 November 16, 1952 Green Bay Packers L 17–3 26723 \n", + "8 9 November 23, 1952 at Washington Redskins W 14–10 21125 \n", + "9 10 November 30, 1952 at Pittsburgh Steelers L 63–7 15140 \n", + "10 11 December 7, 1952 Washington Redskins L 27–17 21237 \n", + "11 12 December 14, 1952 Cleveland Browns W 37–34 41610 \n", + "\n", + " Date_datetime Result_score1 Result_score2 \n", + "0 1952-09-28 24 6 \n", + "1 1952-10-04 31 7 \n", + "2 1952-10-12 17 9 \n", + "3 1952-10-19 24 23 \n", + "4 1952-10-26 14 10 \n", + "5 1952-11-02 28 6 \n", + "6 1952-11-09 23 14 \n", + "7 1952-11-16 17 3 \n", + "8 1952-11-23 14 10 \n", + "9 1952-11-30 63 7 \n", + "10 1952-12-07 27 17 \n", + "11 1952-12-14 37 34 \n", + "\n", + "==================================================\n", + "\n", + "=== Таблица 29/20 (Индекс в train: 11490) ===\n", + "Оригинальная таблица: csv/204-csv/621.csv\n", + "Размер: 10 строк, 6 колонок\n", + "--------------------------------------------------\n", + "Оригинальная таблица:\n", + " No. Date Tournament Winning score \\\n", + "0 1 May 27, 1956 Dallas Centennial Open −12 (64-67-70-67=268) \n", + "1 2 Feb 21, 1960 Tucson Open Invitational −9 (67-67-68-69=271) \n", + "2 3 Jul 9, 1961 St. Paul Open Invitational −19 (66-71-68-64=269) \n", + "3 4 Feb 17, 1963 Tucson Open Invitational −22 (65-67-69-65=266) \n", + "4 5 Aug 28, 1966 Philadelphia Golf Classic −10 (69-69-68-71=278) \n", + "5 6 Jul 24, 1967 PGA Championship −7 (71-72-70-68=281) \n", + "6 7 Apr 21, 1968 Tournament of Champions −8 (70-68-69-69=276) \n", + "7 8 Mar 23, 1970 Greater Jacksonville Open −9 (68-75-70-66=279) \n", + "8 9 Oct 19, 1975 San Antonio Texas Open −13 (71-67-71-66=275) \n", + "9 10 Apr 18, 1976 MONY Tournament of Champions −11 (71-68-69-69=277) \n", + "\n", + " Margin\\nof victory Runner(s)-up \n", + "0 1 stroke Dow Finsterwald, Doug Ford \n", + "1 3 strokes Bob Harris \n", + "2 1 stroke Buster Cupit \n", + "3 11 strokes Gene Littler, Phil Rodgers \n", + "4 1 stroke Jack Nicklaus \n", + "5 Playoff Don Massengale \n", + "6 1 stroke Julius Boros \n", + "7 Playoff Dale Douglass \n", + "8 Playoff Larry Hinson \n", + "9 5 strokes Hubert Green \n", + "Очищенная таблица:\n", + " No. Date Tournament Winning score \\\n", + "0 1 May 27, 1956 Dallas Centennial Open −12 (64-67-70-67=268) \n", + "1 2 Feb 21, 1960 Tucson Open Invitational −9 (67-67-68-69=271) \n", + "2 3 Jul 9, 1961 St. Paul Open Invitational −19 (66-71-68-64=269) \n", + "3 4 Feb 17, 1963 Tucson Open Invitational −22 (65-67-69-65=266) \n", + "4 5 Aug 28, 1966 Philadelphia Golf Classic −10 (69-69-68-71=278) \n", + "5 6 Jul 24, 1967 PGA Championship −7 (71-72-70-68=281) \n", + "6 7 Apr 21, 1968 Tournament of Champions −8 (70-68-69-69=276) \n", + "7 8 Mar 23, 1970 Greater Jacksonville Open −9 (68-75-70-66=279) \n", + "8 9 Oct 19, 1975 San Antonio Texas Open −13 (71-67-71-66=275) \n", + "9 10 Apr 18, 1976 MONY Tournament of Champions −11 (71-68-69-69=277) \n", + "\n", + " Margin of victory Runner(s)-up Winning score_base \\\n", + "0 1 stroke Dow Finsterwald, Doug Ford −12 \n", + "1 3 strokes Bob Harris −9 \n", + "2 1 stroke Buster Cupit −19 \n", + "3 11 strokes Gene Littler, Phil Rodgers −22 \n", + "4 1 stroke Jack Nicklaus −10 \n", + "5 Playoff Don Massengale −7 \n", + "6 1 stroke Julius Boros −8 \n", + "7 Playoff Dale Douglass −9 \n", + "8 Playoff Larry Hinson −13 \n", + "9 5 strokes Hubert Green −11 \n", + "\n", + " Winning score_meta \n", + "0 64-67-70-67=268 \n", + "1 67-67-68-69=271 \n", + "2 66-71-68-64=269 \n", + "3 65-67-69-65=266 \n", + "4 69-69-68-71=278 \n", + "5 71-72-70-68=281 \n", + "6 70-68-69-69=276 \n", + "7 68-75-70-66=279 \n", + "8 71-67-71-66=275 \n", + "9 71-68-69-69=277 \n", + "\n", + "==================================================\n", + "\n", + "=== Таблица 30/20 (Индекс в train: 8928) ===\n", + "Оригинальная таблица: csv/203-csv/550.csv\n", + "Размер: 158 строк, 5 колонок\n", + "--------------------------------------------------\n", + "Оригинальная таблица:\n", + " Name Location County Order \\\n", + "0 Abingdon Abbey Abingdon Oxfordshire Benedictine \n", + "1 Bardney Abbey Bardney Lincolnshire Benedictine \n", + "2 Bardsey Abbey Aberdaron Gwynedd Canon Regular \n", + "3 Barking Abbey Barking Greater London Benedictine \n", + "4 Basingwerk Abbey Holywell Flintshire Cistercian \n", + ".. ... ... ... ... \n", + "153 Whitland Abbey Llanboidy Carmarthenshire Cistercian \n", + "154 Woburn Abbey Woburn Bedfordshire Cistercian \n", + "155 Woodspring Priory Kewstoke Somerset Augustinian \n", + "156 Worcester Priory Worcester Worcestershire Benedictine \n", + "157 Yeaveley Preceptory Yeaveley Derbyshire Knights Hospitaller \n", + "\n", + " Year dissolved \n", + "0 1538 \n", + "1 1538 \n", + "2 1537 \n", + "3 1539 \n", + "4 1536 \n", + ".. ... \n", + "153 1539 \n", + "154 1538 \n", + "155 1536 \n", + "156 1540 \n", + "157 1540 \n", + "\n", + "[158 rows x 5 columns]\n", + "Очищенная таблица:\n", + " Name Location County Order \\\n", + "0 Abingdon Abbey Abingdon Oxfordshire Benedictine \n", + "1 Bardney Abbey Bardney Lincolnshire Benedictine \n", + "2 Bardsey Abbey Aberdaron Gwynedd Canon Regular \n", + "3 Barking Abbey Barking Greater London Benedictine \n", + "4 Basingwerk Abbey Holywell Flintshire Cistercian \n", + ".. ... ... ... ... \n", + "153 Whitland Abbey Llanboidy Carmarthenshire Cistercian \n", + "154 Woburn Abbey Woburn Bedfordshire Cistercian \n", + "155 Woodspring Priory Kewstoke Somerset Augustinian \n", + "156 Worcester Priory Worcester Worcestershire Benedictine \n", + "157 Yeaveley Preceptory Yeaveley Derbyshire Knights Hospitaller \n", + "\n", + " Year dissolved \n", + "0 1538.0 \n", + "1 1538.0 \n", + "2 1537.0 \n", + "3 1539.0 \n", + "4 1536.0 \n", + ".. ... \n", + "153 1539.0 \n", + "154 1538.0 \n", + "155 1536.0 \n", + "156 1540.0 \n", + "157 1540.0 \n", + "\n", + "[158 rows x 5 columns]\n", + "\n", + "==================================================\n", + "\n", + "=== Таблица 31/20 (Индекс в train: 6873) ===\n", + "Оригинальная таблица: csv/204-csv/862.csv\n", + "Размер: 41 строк, 8 колонок\n", + "--------------------------------------------------\n", + "Оригинальная таблица:\n", + " Rank Bib Athlete Country Run 1 Run 2 \\\n", + "0 NaN 1 Felix Loch Germany 48.133 (1) 48.242 (1) \n", + "1 NaN 4 Andi Langenhan Germany 48.316 (2) 48.434 (4) \n", + "2 NaN 7 Johannes Ludwig Germany 48.338 (3) 48.437 (5) \n", + "3 4.0 2 David Möller Germany 48.424 (4) 48.362 (3) \n", + "4 5.0 16 Samuel Edney Canada 48.446 (5) 48.350 (2) \n", + "5 6.0 12 Chris Mazdzer United States 48.581 (8) 48.543 (6) \n", + "6 7.0 3 Albert Demtschenko Russia 48.578 (7) 48.552 (7) \n", + "7 8.0 5 Wolfgang Kindl Austria 48.573 (6) 48.596 (10) \n", + "8 9.0 10 Inars Kivlenieks Latvia 48.622 (10) 48.559 (8) \n", + "9 10.0 13 Daniel Pfister Austria 48.602 (9) 48.654 (13) \n", + "10 11.0 18 Jo Alexander Koppang Norway 48.695 (13) 48.608 (11) \n", + "11 12.0 19 Mārtiņš Rubenis Latvia 48.675 (11) 48.653 (12) \n", + "12 13.0 14 Gregory Carigiet Switzerland 48.699 (14) 48.663 (14) \n", + "13 14.0 8 Viktor Kneyb Russia 48.692 (12) 48.752 (18) \n", + "14 15.0 20 Jozef Ninis Slovakia 48.757 (15) 48.692 (15) \n", + "15 16.0 17 Dominik Fischnaller Italy 48.918 (23) 48.576 (9) \n", + "16 17.0 21 Taylor Morris United States 48.763 (16) 48.780 (20) \n", + "17 18.0 9 David Mair Italy 48.793 (17) 48.771 (19) \n", + "18 19.0 25 Ondřej Hyman Czech Republic 48.894 (21) 48.709 (16) \n", + "19 20.0 15 Manuel Pfister Austria 48.889 (20) 48.780 (20) \n", + "20 21.0 11 Reinhard Egger Austria 48.965 (25) 48.712 (17) \n", + "21 22.0 23 Thor Haug Norbech Norway 48.869 (18) 48.855 (22) \n", + "22 23.0 24 Adam Rosen Great Britain 48.878 (19) 48.884 (23) \n", + "23 24.0 28 John Fennell Canada 48.915 (22) 48.968 (24) \n", + "24 25.0 30 Mitchel Malyk Canada 48.935 (24) 49.005 (25) \n", + "25 26.0 22 Kristaps Maurins Latvia 48.966 (26) NaN \n", + "26 27.0 29 Aleksandr Peretjagin Russia 48.995 (27) NaN \n", + "27 28.0 27 Bruno Banani Tonga 49.026 (28) NaN \n", + "28 29.0 34 Andriy Mandziy Ukraine 49.075 (29) NaN \n", + "29 30.0 26 Maciej Kurowski Poland 49.099 (30) NaN \n", + "30 31.0 31 Tonnes Stang Rolfsen Norway 49.120 (31) NaN \n", + "31 32.0 32 Valentin Cretu Romania 49.201 (32) NaN \n", + "32 32.0 33 Andriy Kis Ukraine 49.201 (32) NaN \n", + "33 34.0 35 Danej Navrboc Slovenia 49.307 (34) NaN \n", + "34 35.0 36 Hidenari Kanayama Japan 49.584 (35) NaN \n", + "35 36.0 37 Pavel Angelov Bulgaria 49.968 (36) NaN \n", + "36 37.0 38 Tilen Sirse Slovenia 50.001 (37) NaN \n", + "37 38.0 39 Kim Dong-Hyeon South Korea 50.375 (38) NaN \n", + "38 39.0 40 Stanislav Benyov Bulgaria 50.590 (39) NaN \n", + "39 40.0 6 Semen Pavlichenko Russia 51.791 (40) NaN \n", + "40 41.0 41 Imre Pulai Hungary 52.512 (41) NaN \n", + "\n", + " Total Behind \n", + "0 1:36.375 NaN \n", + "1 1:36.750 0.375 \n", + "2 1:36.775 0.400 \n", + "3 1:36.786 0.411 \n", + "4 1:36.796 0.421 \n", + "5 1:37.124 0.749 \n", + "6 1:37.130 0.755 \n", + "7 1:37.169 0.794 \n", + "8 1:37.181 0.806 \n", + "9 1:37.256 0.881 \n", + "10 1:37.303 0.928 \n", + "11 1:37.328 0.953 \n", + "12 1:37.362 0.987 \n", + "13 1:37.444 1.069 \n", + "14 1:37.449 1.074 \n", + "15 1:37.494 1.119 \n", + "16 1:37.543 1.168 \n", + "17 1:37.564 1.189 \n", + "18 1:37.603 1.228 \n", + "19 1:37.669 1.294 \n", + "20 1:37.677 1.302 \n", + "21 1:37.724 1.349 \n", + "22 1:37.762 1.387 \n", + "23 1:37.883 1.508 \n", + "24 1:37.940 1.565 \n", + "25 48.966 NaN \n", + "26 48.995 NaN \n", + "27 49.026 NaN \n", + "28 49.075 NaN \n", + "29 49.099 NaN \n", + "30 49.120 NaN \n", + "31 49.201 NaN \n", + "32 49.201 NaN \n", + "33 49.307 NaN \n", + "34 49.584 NaN \n", + "35 49.968 NaN \n", + "36 50.001 NaN \n", + "37 50.375 NaN \n", + "38 50.590 NaN \n", + "39 51.791 NaN \n", + "40 52.512 NaN \n", + "Очищенная таблица:\n", + " Rank Bib Athlete Country Run 1 Run 2 \\\n", + "0 NaN 1 Felix Loch Germany 48.133 (1) 48.242 (1) \n", + "1 NaN 4 Andi Langenhan Germany 48.316 (2) 48.434 (4) \n", + "2 NaN 7 Johannes Ludwig Germany 48.338 (3) 48.437 (5) \n", + "3 4.0 2 David Möller Germany 48.424 (4) 48.362 (3) \n", + "4 5.0 16 Samuel Edney Canada 48.446 (5) 48.350 (2) \n", + "5 6.0 12 Chris Mazdzer United States 48.581 (8) 48.543 (6) \n", + "6 7.0 3 Albert Demtschenko Russia 48.578 (7) 48.552 (7) \n", + "7 8.0 5 Wolfgang Kindl Austria 48.573 (6) 48.596 (10) \n", + "8 9.0 10 Inars Kivlenieks Latvia 48.622 (10) 48.559 (8) \n", + "9 10.0 13 Daniel Pfister Austria 48.602 (9) 48.654 (13) \n", + "10 11.0 18 Jo Alexander Koppang Norway 48.695 (13) 48.608 (11) \n", + "11 12.0 19 Mārtiņš Rubenis Latvia 48.675 (11) 48.653 (12) \n", + "12 13.0 14 Gregory Carigiet Switzerland 48.699 (14) 48.663 (14) \n", + "13 14.0 8 Viktor Kneyb Russia 48.692 (12) 48.752 (18) \n", + "14 15.0 20 Jozef Ninis Slovakia 48.757 (15) 48.692 (15) \n", + "15 16.0 17 Dominik Fischnaller Italy 48.918 (23) 48.576 (9) \n", + "16 17.0 21 Taylor Morris United States 48.763 (16) 48.780 (20) \n", + "17 18.0 9 David Mair Italy 48.793 (17) 48.771 (19) \n", + "18 19.0 25 Ondřej Hyman Czech Republic 48.894 (21) 48.709 (16) \n", + "19 20.0 15 Manuel Pfister Austria 48.889 (20) 48.780 (20) \n", + "20 21.0 11 Reinhard Egger Austria 48.965 (25) 48.712 (17) \n", + "21 22.0 23 Thor Haug Norbech Norway 48.869 (18) 48.855 (22) \n", + "22 23.0 24 Adam Rosen Great Britain 48.878 (19) 48.884 (23) \n", + "23 24.0 28 John Fennell Canada 48.915 (22) 48.968 (24) \n", + "24 25.0 30 Mitchel Malyk Canada 48.935 (24) 49.005 (25) \n", + "25 26.0 22 Kristaps Maurins Latvia 48.966 (26) NaN \n", + "26 27.0 29 Aleksandr Peretjagin Russia 48.995 (27) NaN \n", + "27 28.0 27 Bruno Banani Tonga 49.026 (28) NaN \n", + "28 29.0 34 Andriy Mandziy Ukraine 49.075 (29) NaN \n", + "29 30.0 26 Maciej Kurowski Poland 49.099 (30) NaN \n", + "30 31.0 31 Tonnes Stang Rolfsen Norway 49.120 (31) NaN \n", + "31 32.0 32 Valentin Cretu Romania 49.201 (32) NaN \n", + "32 32.0 33 Andriy Kis Ukraine 49.201 (32) NaN \n", + "33 34.0 35 Danej Navrboc Slovenia 49.307 (34) NaN \n", + "34 35.0 36 Hidenari Kanayama Japan 49.584 (35) NaN \n", + "35 36.0 37 Pavel Angelov Bulgaria 49.968 (36) NaN \n", + "36 37.0 38 Tilen Sirse Slovenia 50.001 (37) NaN \n", + "37 38.0 39 Kim Dong-Hyeon South Korea 50.375 (38) NaN \n", + "38 39.0 40 Stanislav Benyov Bulgaria 50.590 (39) NaN \n", + "39 40.0 6 Semen Pavlichenko Russia 51.791 (40) NaN \n", + "40 41.0 41 Imre Pulai Hungary 52.512 (41) NaN \n", + "\n", + " Total Behind Run 1_base Run 1_meta Run 2_base Run 2_meta \\\n", + "0 1:36.375 NaN 48.133 1 48.242 1 \n", + "1 1:36.750 0.375 48.316 2 48.434 4 \n", + "2 1:36.775 0.400 48.338 3 48.437 5 \n", + "3 1:36.786 0.411 48.424 4 48.362 3 \n", + "4 1:36.796 0.421 48.446 5 48.350 2 \n", + "5 1:37.124 0.749 48.581 8 48.543 6 \n", + "6 1:37.130 0.755 48.578 7 48.552 7 \n", + "7 1:37.169 0.794 48.573 6 48.596 10 \n", + "8 1:37.181 0.806 48.622 10 48.559 8 \n", + "9 1:37.256 0.881 48.602 9 48.654 13 \n", + "10 1:37.303 0.928 48.695 13 48.608 11 \n", + "11 1:37.328 0.953 48.675 11 48.653 12 \n", + "12 1:37.362 0.987 48.699 14 48.663 14 \n", + "13 1:37.444 1.069 48.692 12 48.752 18 \n", + "14 1:37.449 1.074 48.757 15 48.692 15 \n", + "15 1:37.494 1.119 48.918 23 48.576 9 \n", + "16 1:37.543 1.168 48.763 16 48.780 20 \n", + "17 1:37.564 1.189 48.793 17 48.771 19 \n", + "18 1:37.603 1.228 48.894 21 48.709 16 \n", + "19 1:37.669 1.294 48.889 20 48.780 20 \n", + "20 1:37.677 1.302 48.965 25 48.712 17 \n", + "21 1:37.724 1.349 48.869 18 48.855 22 \n", + "22 1:37.762 1.387 48.878 19 48.884 23 \n", + "23 1:37.883 1.508 48.915 22 48.968 24 \n", + "24 1:37.940 1.565 48.935 24 49.005 25 \n", + "25 48.966 NaN 48.966 26 NaN NaN \n", + "26 48.995 NaN 48.995 27 NaN NaN \n", + "27 49.026 NaN 49.026 28 NaN NaN \n", + "28 49.075 NaN 49.075 29 NaN NaN \n", + "29 49.099 NaN 49.099 30 NaN NaN \n", + "30 49.120 NaN 49.120 31 NaN NaN \n", + "31 49.201 NaN 49.201 32 NaN NaN \n", + "32 49.201 NaN 49.201 32 NaN NaN \n", + "33 49.307 NaN 49.307 34 NaN NaN \n", + "34 49.584 NaN 49.584 35 NaN NaN \n", + "35 49.968 NaN 49.968 36 NaN NaN \n", + "36 50.001 NaN 50.001 37 NaN NaN \n", + "37 50.375 NaN 50.375 38 NaN NaN \n", + "38 50.590 NaN 50.590 39 NaN NaN \n", + "39 51.791 NaN 51.791 40 NaN NaN \n", + "40 52.512 NaN 52.512 41 NaN NaN \n", + "\n", + " Total_score1 Total_score2 \n", + "0 1.0 36.0 \n", + "1 1.0 36.0 \n", + "2 1.0 36.0 \n", + "3 1.0 36.0 \n", + "4 1.0 36.0 \n", + "5 1.0 37.0 \n", + "6 1.0 37.0 \n", + "7 1.0 37.0 \n", + "8 1.0 37.0 \n", + "9 1.0 37.0 \n", + "10 1.0 37.0 \n", + "11 1.0 37.0 \n", + "12 1.0 37.0 \n", + "13 1.0 37.0 \n", + "14 1.0 37.0 \n", + "15 1.0 37.0 \n", + "16 1.0 37.0 \n", + "17 1.0 37.0 \n", + "18 1.0 37.0 \n", + "19 1.0 37.0 \n", + "20 1.0 37.0 \n", + "21 1.0 37.0 \n", + "22 1.0 37.0 \n", + "23 1.0 37.0 \n", + "24 1.0 37.0 \n", + "25 NaN NaN \n", + "26 NaN NaN \n", + "27 NaN NaN \n", + "28 NaN NaN \n", + "29 NaN NaN \n", + "30 NaN NaN \n", + "31 NaN NaN \n", + "32 NaN NaN \n", + "33 NaN NaN \n", + "34 NaN NaN \n", + "35 NaN NaN \n", + "36 NaN NaN \n", + "37 NaN NaN \n", + "38 NaN NaN \n", + "39 NaN NaN \n", + "40 NaN NaN \n", + "\n", + "==================================================\n", + "\n", + "=== Таблица 32/20 (Индекс в train: 3611) ===\n", + "Оригинальная таблица: csv/203-csv/282.csv\n", + "Размер: 8 строк, 12 колонок\n", + "--------------------------------------------------\n", + "Оригинальная таблица:\n", + " Date Racecourse Distance Race Status \\\n", + "0 April 10, 2004 Kempton Park 8f Listed \n", + "1 April 24, 2004 Sandown Park 8f Group 2 \n", + "2 May 22, 2004 Curragh 8f Group 2 \n", + "3 June 15, 2004 Royal Ascot 8f Group 1 \n", + "4 July 6, 2004 Newmarket 8f Group 1 \n", + "5 July 28, 2004 Goodwood 8f Group 1 \n", + "6 September 11, 2004 Leopardstown 8f Group 1 \n", + "7 September 25, 2004 Ascot 8f Group 1 \n", + "\n", + " Race Position Winning Distance (lengths) \\\n", + "0 Snowdrop Fillies Stakes 2nd to Beneventa - \n", + "1 Betfred Mile 3rd to Hurricane Alan - \n", + "2 Ridgewood Pearl Stakes 1st 6 \n", + "3 Queen Anne Stakes 2nd to Refuse to Bend - \n", + "4 Falmouth Stakes 1st 2½ \n", + "5 Sussex Stakes 1st nk \n", + "6 Matron Stakes 1st ½ \n", + "7 QEII Stakes 6th to Rakti - \n", + "\n", + " Jockey Rating Going Odds Prize Money \n", + "0 Johnny Murtagh 114 Good to Soft 4/11F £6,600 \n", + "1 Johnny Murtagh 114 Good to Soft 5/2F £11,000 \n", + "2 Johnny Murtagh - Good to Firm 11/10F £56,760.56 \n", + "3 Johnny Murtagh 114 Good to Firm 6/1 £55,000 \n", + "4 Johnny Murtagh 114 Good to Firm 11/4 £116,000 \n", + "5 Johnny Murtagh 115 Good 3/1 £174,000 \n", + "6 Johnny Murtagh - Good to Firm 8/13F £119,014.08 \n", + "7 Johnny Murtagh 120 Good to Firm 5/2F £3,750 \n", + "Очищенная таблица:\n", + " Date Racecourse Distance Race Status \\\n", + "0 April 10, 2004 Kempton Park 8f Listed \n", + "1 April 24, 2004 Sandown Park 8f Group 2 \n", + "2 May 22, 2004 Curragh 8f Group 2 \n", + "3 June 15, 2004 Royal Ascot 8f Group 1 \n", + "4 July 6, 2004 Newmarket 8f Group 1 \n", + "5 July 28, 2004 Goodwood 8f Group 1 \n", + "6 September 11, 2004 Leopardstown 8f Group 1 \n", + "7 September 25, 2004 Ascot 8f Group 1 \n", + "\n", + " Race Position Winning Distance (lengths) \\\n", + "0 Snowdrop Fillies Stakes 2nd to Beneventa - \n", + "1 Betfred Mile 3rd to Hurricane Alan - \n", + "2 Ridgewood Pearl Stakes 1st 6 \n", + "3 Queen Anne Stakes 2nd to Refuse to Bend - \n", + "4 Falmouth Stakes 1st 2½ \n", + "5 Sussex Stakes 1st nk \n", + "6 Matron Stakes 1st ½ \n", + "7 QEII Stakes 6th to Rakti - \n", + "\n", + " Jockey Rating Going Odds Prize Money Date_datetime \n", + "0 Johnny Murtagh 114 Good to Soft 4/11F £6,600 2004-04-10 \n", + "1 Johnny Murtagh 114 Good to Soft 5/2F £11,000 2004-04-24 \n", + "2 Johnny Murtagh - Good to Firm 11/10F £56,760.56 2004-05-22 \n", + "3 Johnny Murtagh 114 Good to Firm 6/1 £55,000 2004-06-15 \n", + "4 Johnny Murtagh 114 Good to Firm 11/4 £116,000 2004-07-06 \n", + "5 Johnny Murtagh 115 Good 3/1 £174,000 2004-07-28 \n", + "6 Johnny Murtagh - Good to Firm 8/13F £119,014.08 2004-09-11 \n", + "7 Johnny Murtagh 120 Good to Firm 5/2F £3,750 2004-09-25 \n", + "\n", + "==================================================\n", + "\n", + "=== Таблица 33/20 (Индекс в train: 7359) ===\n", + "Оригинальная таблица: csv/203-csv/385.csv\n", + "Размер: 11 строк, 8 колонок\n", + "--------------------------------------------------\n", + "Оригинальная таблица:\n", + " Material Formula \\\n", + "0 Diamond C \n", + "1 Glasses Silica with Pb, Al, &/or Tl \n", + "2 White Sapphire Al2O3 \n", + "3 Spinel MgO·Al2O3 \n", + "4 Rutile TiO2 \n", + "5 Strontium titanate SrTiO3 \n", + "6 YAG Y3Al5O12 \n", + "7 GGG Gd3Ga5O12 \n", + "8 Cubic Zirconia ZrO2(+ rare earths) \n", + "9 Moissanite SiC \n", + "10 Quartz Silica \n", + "\n", + " Refractive\\nindex(es)\\n589.3 nm Dispersion\\n431 – 687 nm \\\n", + "0 2.417 0.044 \n", + "1 ~ 1.6 > 0.020 \n", + "2 1.762 – 1.770 0.018 \n", + "3 1.727 0.020 \n", + "4 2.62 – 2.9 0.33 \n", + "5 2.41 0.19 \n", + "6 1.83 0.028 \n", + "7 1.97 0.045 \n", + "8 ~ 2.2 ~ 0.06 \n", + "9 2.648 – 2.691 0.104 \n", + "10 1.543 – 1.554 NaN \n", + "\n", + " Hardness\\n(Mohs'\\nscale) Density\\n(g/cm3) Thermal\\nCond. State of\\nthe art \n", + "0 10 3.52 Excellent (Natural) \n", + "1 < 6 2.4 – 4.2 Poor 1700 – \n", + "2 9 3.97 Poor 1900–1947 \n", + "3 8 ~ 3.6 Poor 1920–1947 \n", + "4 ~ 6 4.25 Poor 1947–1955 \n", + "5 5.5 5.13 Poor 1955–1970 \n", + "6 8.25 4.55 – 4.65 Poor 1970–1975 \n", + "7 7 7.02 Poor 1973–1975 \n", + "8 ~ 8.3 ~ 5.7 Poor 1976 – \n", + "9 8.5–9.25 3.2 High 1998 – \n", + "10 7- 2.50 – 2.65 NaN Ancient \n", + "Очищенная таблица:\n", + " Material Formula \\\n", + "0 Diamond C \n", + "1 Glasses Silica with Pb, Al, &/or Tl \n", + "2 White Sapphire Al2O3 \n", + "3 Spinel MgO·Al2O3 \n", + "4 Rutile TiO2 \n", + "5 Strontium titanate SrTiO3 \n", + "6 YAG Y3Al5O12 \n", + "7 GGG Gd3Ga5O12 \n", + "8 Cubic Zirconia ZrO2(+ rare earths) \n", + "9 Moissanite SiC \n", + "10 Quartz Silica \n", + "\n", + " Refractive index(es) 589.3 nm Dispersion 431 – 687 nm \\\n", + "0 2.417 0.044 \n", + "1 ~ 1.6 > 0.020 \n", + "2 1.762 – 1.770 0.018 \n", + "3 1.727 0.020 \n", + "4 2.62 – 2.9 0.33 \n", + "5 2.41 0.19 \n", + "6 1.83 0.028 \n", + "7 1.97 0.045 \n", + "8 ~ 2.2 ~ 0.06 \n", + "9 2.648 – 2.691 0.104 \n", + "10 1.543 – 1.554 NaN \n", + "\n", + " Hardness (Mohs' scale) Density (g/cm3) Thermal Cond. State of the art \\\n", + "0 10 3.52 Excellent (Natural) \n", + "1 < 6 2.4 – 4.2 Poor 1700 – \n", + "2 9 3.97 Poor 1900–1947 \n", + "3 8 ~ 3.6 Poor 1920–1947 \n", + "4 ~ 6 4.25 Poor 1947–1955 \n", + "5 5.5 5.13 Poor 1955–1970 \n", + "6 8.25 4.55 – 4.65 Poor 1970–1975 \n", + "7 7 7.02 Poor 1973–1975 \n", + "8 ~ 8.3 ~ 5.7 Poor 1976 – \n", + "9 8.5–9.25 3.2 High 1998 – \n", + "10 7- 2.50 – 2.65 NaN Ancient \n", + "\n", + " State of the art_score1 State of the art_score2 \n", + "0 NaN NaN \n", + "1 NaN NaN \n", + "2 1900.0 1947.0 \n", + "3 1920.0 1947.0 \n", + "4 1947.0 1955.0 \n", + "5 1955.0 1970.0 \n", + "6 1970.0 1975.0 \n", + "7 1973.0 1975.0 \n", + "8 NaN NaN \n", + "9 NaN NaN \n", + "10 NaN NaN \n", + "\n", + "==================================================\n", + "\n", + "=== Таблица 34/20 (Индекс в train: 9654) ===\n", + "Оригинальная таблица: csv/204-csv/739.csv\n", + "Размер: 43 строк, 7 колонок\n", + "--------------------------------------------------\n", + "Оригинальная таблица:\n", + " Districts UBIGEO Area\\n(km²) Population \\\n", + "0 Ancón 150102 299.22 29,419 \n", + "1 Ate 150103 77.72 419,663 \n", + "2 Barranco 150104 3.33 45,922 \n", + "3 Breña 150105 3.22 94,808 \n", + "4 Carabayllo 150106 346.88 188,764 \n", + "5 Chaclacayo 150107 39.50 39,686 \n", + "6 Chorrillos 150108 38.94 262,595 \n", + "7 Cieneguilla 150109 240.33 15,784 \n", + "8 Comas 150110 48.75 464,745 \n", + "9 El Agustino 150111 12.54 165,425 \n", + "10 Independencia 150112 14.56 197,308 \n", + "11 Jesús María 150113 4.57 58,588 \n", + "12 La Molina 150114 65.75 124,468 \n", + "13 La Victoria 150115 8.74 190,218 \n", + "14 Lima 150101 21.88 278,804 \n", + "15 Lince 150116 3.03 52,123 \n", + "16 Los Olivos 150117 18.25 286,549 \n", + "17 Lurigancho 150118 236.47 90,594 \n", + "18 Lurín 150119 181.12 55,953 \n", + "19 Magdalena del Mar 150120 3.61 48,445 \n", + "20 Miraflores 150122 9.62 92,815 \n", + "21 Pachacamac 150123 160.23 54,763 \n", + "22 Pucusana 150124 37.83 9,231 \n", + "23 Pueblo Libre 150121 4.38 71,892 \n", + "24 Puente Piedra 150125 71.18 203,473 \n", + "25 Punta Hermosa 150126 119.50 4,676 \n", + "26 Punta Negra 150127 130.50 4,473 \n", + "27 Rímac 150128 11.87 175,793 \n", + "28 San Bartolo 150129 45.01 5,733 \n", + "29 San Borja 150130 9.96 102,762 \n", + "30 San Isidro 150131 11.10 55,309 \n", + "31 San Juan de Lurigancho 150132 131.25 812,656 \n", + "32 San Juan de Miraflores 150133 23.98 335,237 \n", + "33 San Luis 150134 3.49 46,258 \n", + "34 San Martín de Porres 150135 36.91 525,155 \n", + "35 San Miguel 150136 10.72 124,904 \n", + "36 Santa Anita 150137 10.69 160,777 \n", + "37 Santa María del Mar District 150138 9.81 88 \n", + "38 Santa Rosa 150139 21.50 9,379 \n", + "39 Santiago de Surco 150140 34.75 272,690 \n", + "40 Surquillo 150141 3.46 84,202 \n", + "41 Villa El Salvador 150142 35.46 367,436 \n", + "42 Villa María del Triunfo 150143 70.57 355,761 \n", + "\n", + " Population density\\n(/km²) Created Postal\\ncode \n", + "0 98.3 29 October 1874 2 \n", + "1 5,399.7 2 January 1857 3 \n", + "2 13,790.4 26 October 1874 4 \n", + "3 29,443.5 15 July 1949 5 \n", + "4 544.2 4 August 1821 6 \n", + "5 1,004.7 24 April 1940 8 \n", + "6 6,743.6 2 January 1857 9 \n", + "7 65.7 3 March 1970 40 \n", + "8 9,533.2 12 December 1961 7 \n", + "9 13,191.8 6 January 1965 10 \n", + "10 13,551.4 16 March 1964 28 \n", + "11 12,820.1 13 December 1963 11 \n", + "12 1,893.0 6 February 1962 12 \n", + "13 21,764.1 16 August 1921 13 \n", + "14 13,187.2 2 January 1857 1 \n", + "15 17,202.3 20 May 1936 14 \n", + "16 15,701.3 7 April 1989 39 \n", + "17 383.1 2 January 1857 15 \n", + "18 308.9 2 January 1857 16 \n", + "19 13,419.7 10 May 1920 17 \n", + "20 9,648.1 2 January 1857 18 \n", + "21 341.8 2 January 1857 19 \n", + "22 244.0 22 January 1943 20 \n", + "23 16,413.7 2 January 1857 21 \n", + "24 2,858.6 14 February 1927 22 \n", + "25 39.1 7 April 1954 24 \n", + "26 34.3 7 April 1954 23 \n", + "27 14,809.9 16 August 1921 25 \n", + "28 127.4 5 May 1946 26 \n", + "29 10,317.5 1 June 1983 41 \n", + "30 6,165.6 24 April 1931 27 \n", + "31 6,191.7 13 January 1967 36 \n", + "32 13,979.9 12 January 1965 29 \n", + "33 13,254.4 30 May 1968 30 \n", + "34 14,228.0 22 May 1950 31 \n", + "35 11,651.5 10 May 1920 32 \n", + "36 15,039.9 26 October 1989 43 \n", + "37 9.0 16 January 1962 37 \n", + "38 436.2 7 February 1962 38 \n", + "39 7,847.2 16 December 1929 33 \n", + "40 24,335.8 15 July 1949 34 \n", + "41 10,362.0 1 June 1983 42 \n", + "42 5,041.2 28 December 1961 35 \n", + "Очищенная таблица:\n", + " Districts UBIGEO Area (km²) Population \\\n", + "0 Ancón 150102 299.22 29419 \n", + "1 Ate 150103 77.72 419663 \n", + "2 Barranco 150104 3.33 45922 \n", + "3 Breña 150105 3.22 94808 \n", + "4 Carabayllo 150106 346.88 188764 \n", + "5 Chaclacayo 150107 39.50 39686 \n", + "6 Chorrillos 150108 38.94 262595 \n", + "7 Cieneguilla 150109 240.33 15784 \n", + "8 Comas 150110 48.75 464745 \n", + "9 El Agustino 150111 12.54 165425 \n", + "10 Independencia 150112 14.56 197308 \n", + "11 Jesús María 150113 4.57 58588 \n", + "12 La Molina 150114 65.75 124468 \n", + "13 La Victoria 150115 8.74 190218 \n", + "14 Lima 150101 21.88 278804 \n", + "15 Lince 150116 3.03 52123 \n", + "16 Los Olivos 150117 18.25 286549 \n", + "17 Lurigancho 150118 236.47 90594 \n", + "18 Lurín 150119 181.12 55953 \n", + "19 Magdalena del Mar 150120 3.61 48445 \n", + "20 Miraflores 150122 9.62 92815 \n", + "21 Pachacamac 150123 160.23 54763 \n", + "22 Pucusana 150124 37.83 9231 \n", + "23 Pueblo Libre 150121 4.38 71892 \n", + "24 Puente Piedra 150125 71.18 203473 \n", + "25 Punta Hermosa 150126 119.50 4676 \n", + "26 Punta Negra 150127 130.50 4473 \n", + "27 Rímac 150128 11.87 175793 \n", + "28 San Bartolo 150129 45.01 5733 \n", + "29 San Borja 150130 9.96 102762 \n", + "30 San Isidro 150131 11.10 55309 \n", + "31 San Juan de Lurigancho 150132 131.25 812656 \n", + "32 San Juan de Miraflores 150133 23.98 335237 \n", + "33 San Luis 150134 3.49 46258 \n", + "34 San Martín de Porres 150135 36.91 525155 \n", + "35 San Miguel 150136 10.72 124904 \n", + "36 Santa Anita 150137 10.69 160777 \n", + "37 Santa María del Mar District 150138 9.81 88 \n", + "38 Santa Rosa 150139 21.50 9379 \n", + "39 Santiago de Surco 150140 34.75 272690 \n", + "40 Surquillo 150141 3.46 84202 \n", + "41 Villa El Salvador 150142 35.46 367436 \n", + "42 Villa María del Triunfo 150143 70.57 355761 \n", + "\n", + " Population density (/km²) Created Postal code \n", + "0 98.3 29 October 1874 2 \n", + "1 5399.7 2 January 1857 3 \n", + "2 13790.4 26 October 1874 4 \n", + "3 29443.5 15 July 1949 5 \n", + "4 544.2 4 August 1821 6 \n", + "5 1004.7 24 April 1940 8 \n", + "6 6743.6 2 January 1857 9 \n", + "7 65.7 3 March 1970 40 \n", + "8 9533.2 12 December 1961 7 \n", + "9 13191.8 6 January 1965 10 \n", + "10 13551.4 16 March 1964 28 \n", + "11 12820.1 13 December 1963 11 \n", + "12 1893.0 6 February 1962 12 \n", + "13 21764.1 16 August 1921 13 \n", + "14 13187.2 2 January 1857 1 \n", + "15 17202.3 20 May 1936 14 \n", + "16 15701.3 7 April 1989 39 \n", + "17 383.1 2 January 1857 15 \n", + "18 308.9 2 January 1857 16 \n", + "19 13419.7 10 May 1920 17 \n", + "20 9648.1 2 January 1857 18 \n", + "21 341.8 2 January 1857 19 \n", + "22 244.0 22 January 1943 20 \n", + "23 16413.7 2 January 1857 21 \n", + "24 2858.6 14 February 1927 22 \n", + "25 39.1 7 April 1954 24 \n", + "26 34.3 7 April 1954 23 \n", + "27 14809.9 16 August 1921 25 \n", + "28 127.4 5 May 1946 26 \n", + "29 10317.5 1 June 1983 41 \n", + "30 6165.6 24 April 1931 27 \n", + "31 6191.7 13 January 1967 36 \n", + "32 13979.9 12 January 1965 29 \n", + "33 13254.4 30 May 1968 30 \n", + "34 14228.0 22 May 1950 31 \n", + "35 11651.5 10 May 1920 32 \n", + "36 15039.9 26 October 1989 43 \n", + "37 9.0 16 January 1962 37 \n", + "38 436.2 7 February 1962 38 \n", + "39 7847.2 16 December 1929 33 \n", + "40 24335.8 15 July 1949 34 \n", + "41 10362.0 1 June 1983 42 \n", + "42 5041.2 28 December 1961 35 \n", + "\n", + "==================================================\n", + "\n", + "=== Таблица 35/20 (Индекс в train: 4557) ===\n", + "Оригинальная таблица: csv/204-csv/776.csv\n", + "Размер: 59 строк, 5 колонок\n", + "--------------------------------------------------\n", + "Оригинальная таблица:\n", + " Site \\\n", + "0 Atwater Library of the Mechanics' Institute of... \n", + "1 Bank of Montreal \n", + "2 Battle of Rivière des Prairies / Battle of Cou... \n", + "3 Battle of the Lake of Two Mountains \n", + "4 Black Watch (Royal Highland Regiment) of Canad... \n", + "5 Bonsecours Market \n", + "6 Château De Ramezay / India House \n", + "7 Christ Church Cathedral \n", + "8 Church of Notre-Dame-de-la-Défense \n", + "9 Church of Saint-Léon-de-Westmount \n", + "10 Erskine and American United Church \n", + "11 Former Montreal Custom House \n", + "12 George Stephen House / Mount Stephen Club \n", + "13 Grey Nuns' Hospital \n", + "14 H. Vincent Meredith Residence \n", + "15 Hersey Pavilion \n", + "16 Hochelaga \n", + "17 Lachine Canal \n", + "18 Lachine Canal Manufacturing Complex \n", + "19 Last Post Fund National Field of Honour \n", + "20 LeBer-LeMoyne House \n", + "21 Louis-Joseph Papineau \n", + "22 Maison Cartier \n", + "23 Maison Saint-Gabriel \n", + "24 Marie-Reine-du-Monde Cathedral \n", + "25 Marlborough Apartments \n", + "26 Masonic Memorial Temple \n", + "27 Merchants Textile Mill \n", + "28 Model City of Mount Royal \n", + "29 Monklands / Villa Maria Convent \n", + "30 Montreal Botanical Garden \n", + "31 Montreal City Hall \n", + "32 Montreal Forum \n", + "33 Montreal's Birthplace \n", + "34 Monument National \n", + "35 Mother House of the Grey Nuns of Montreal \n", + "36 Mount Royal Cemetery \n", + "37 Notre-Dame Roman Catholic Church / Basilica \n", + "38 Notre-Dame-des-Neiges Cemetery \n", + "39 Outremont Theatre \n", + "40 Pavillon Mailloux \n", + "41 Rialto Theatre \n", + "42 Sainte-Anne-de-Bellevue Canal \n", + "43 Saint Joseph's Oratory of Mount Royal \n", + "44 Saint-Sulpice Seminary and its Gardens \n", + "45 Senneville Historic District \n", + "46 Sir George-Étienne Cartier \n", + "47 St. George Antiochian Orthodox Church \n", + "48 St. George's Anglican Church \n", + "49 St. James United Church \n", + "50 St. Patrick's Basilica \n", + "51 Sulpician Towers / Fort de la Montagne \n", + "52 The Fur Trade at Lachine \n", + "53 The Main \n", + "54 Trafalgar Lodge \n", + "55 Van Horne / Shaughnessy House \n", + "56 Westmount District \n", + "57 Wilson Chambers \n", + "58 Windsor Station (Canadian Pacific) \n", + "\n", + " Date(s) Designated \\\n", + "0 1920 (completed) 2005 \n", + "1 1894 (completed) 1990 \n", + "2 1690 (battle) 1924 \n", + "3 1689 (battle) 1925 \n", + "4 1906 (completed) 2008 \n", + "5 1847 (completed) 1984 \n", + "6 1705 (completed), 1756 (rebuilt after fire) 1949 \n", + "7 1860 (completed) 1999 \n", + "8 1919 (completed) 2002 \n", + "9 1903 (completed) 1997 \n", + "10 1894 (completed) 1998 \n", + "11 1838 (completed) 1997 \n", + "12 1881 (completed) 1971 \n", + "13 1765 (completed) 1973 \n", + "14 1897 (completed) 1990 \n", + "15 1905 (completed) 1997 \n", + "16 1300s (ca.) (first construction of fortified v... 1920 \n", + "17 1825 (completed) 1929 \n", + "18 1825 (completed) 1996 \n", + "19 1930 (established) 2007 \n", + "20 1671 (completed) 2002 \n", + "21 1785 (completed) 1968 \n", + "22 1813 (completed) 1982 \n", + "23 1668 (completed) 2007 \n", + "24 1894 (consecrated) 1999 \n", + "25 1900 (completed) 1990 \n", + "26 1930 (completed) 2001 \n", + "27 1882 (established) 1989 \n", + "28 1914 (district plan) 2008 \n", + "29 1804 (completed) 1951 \n", + "30 1931 (established) 2008 \n", + "31 1878 (completed), 1922 (rebuilt) 1984 \n", + "32 1924 (completed) 1997 \n", + "33 1642 (event) 1924 \n", + "34 1893 (completed) 1985 \n", + "35 1871 (completed) 2011 \n", + "36 1852 (established) 1999 \n", + "37 1829 (completed) 1989 \n", + "38 1854 (established) 1999 \n", + "39 1929 (completed) 1993 \n", + "40 1931 (completed) 1997 \n", + "41 1924 (completed) 1993 \n", + "42 1843 (completed) 1929 \n", + "43 1904 (established), 1967 (basilica completed) 2003 \n", + "44 1687 (completed) 1980 \n", + "45 1860 (established) 2002 \n", + "46 1838 (completed) 1964 \n", + "47 1940 (completed) 1999 \n", + "48 1870 (completed) 1990 \n", + "49 1888 (completed) 1996 \n", + "50 1847 (completed) 1990 \n", + "51 1694 (completed) 1970 \n", + "52 1803 (completed) 1970 \n", + "53 NaN 1996 \n", + "54 1848 (completed) 1990 \n", + "55 1848 (completed) 1973 \n", + "56 1874 (city incorporated) 2012 \n", + "57 1868 (completed) 1990 \n", + "58 1889 (completed) 1975 \n", + "\n", + " Location \\\n", + "0 Montreal\\n45°29′19.17″N 73°35′3.41″W / 45.48... \n", + "1 Montreal\\n45°29′15.81″N 73°34′7.45″W / 45.48... \n", + "2 Montreal\\n45°41′56.07″N 73°30′13.68″W / 45.6... \n", + "3 Senneville\\n45°26′52.69″N 73°56′25.41″W / 45... \n", + "4 Montreal\\n45°30′28.37″N 73°34′11.38″W / 45.5... \n", + "5 Montreal\\n45°30′32.21″N 73°33′5.18″W / 45.50... \n", + "6 Montreal\\n45°30′31.54″N 73°33′11.28″W / 45.5... \n", + "7 Montreal\\n45°30′13.21″N 73°34′12.04″W / 45.5... \n", + "8 Montreal\\n45°32′5.82″N 73°36′41.28″W / 45.53... \n", + "9 Westmount\\n45°29′7.58″N 73°35′30.75″W / 45.4... \n", + "10 Montreal\\n45°29′56.98″N 73°34′47.4″W / 45.49... \n", + "11 Montreal\\n45°30′11.98″N 73°33′16.5″W / 45.50... \n", + "12 Montreal\\n45°29′56.73″N 73°34′32.93″W / 45.4... \n", + "13 Montreal\\n45°30′1.31″N 73°33′17.2″W / 45.500... \n", + "14 Montreal\\n45°30′15.11″N 73°34′54.7″W / 45.50... \n", + "15 Montreal\\n45°30′30.94″N 73°34′50.01″W / 45.5... \n", + "16 Montreal\\n45°30′12.83″N 73°34′30.58″W / 45.5... \n", + "17 Montreal\\n45°27′30″N 73°36′42″W / 45.45833°N... \n", + "18 Montreal\\n45°27′30″N 73°36′42″W / 45.45833°N... \n", + "19 Pointe-Claire\\n45°26′38.80″N 73°50′15.58″W / ... \n", + "20 Montreal\\n45°25′48″N 73°39′59″W / 45.43000°N... \n", + "21 Montreal\\n45°30′35.77″N 73°33′7.88″W / 45.50... \n", + "22 Montreal\\n45°30′28.56″N 73°33′9.18″W / 45.50... \n", + "23 Montreal\\n45°28′33.37″N 73°33′21.58″W / 45.4... \n", + "24 Montreal\\n45°29′57.86″N 73°34′7.36″W / 45.49... \n", + "25 Montreal\\n45°30′25.07″N 73°34′32.6″W / 45.50... \n", + "26 Montreal\\n45°29′40.92″N 73°34′58.85″W / 45.4... \n", + "27 Montreal\\n45°28′32″N 73°34′48″W / 45.47556°N... \n", + "28 Mount Royal\\n45°30′58″N 73°38′35″W / 45.5161... \n", + "29 Montreal\\n45°28′54.53″N 73°37′1.6″W / 45.481... \n", + "30 Montreal\\n45°33′26.00″N 73°33′24.50″W / 45.5... \n", + "31 Montreal\\n45°30′31.84″N 73°33′14.45″W / 45.5... \n", + "32 Montreal\\n45°29′25″N 73°35′5″W / 45.49028°N ... \n", + "33 Montreal\\n45°30′12.27″N 73°33′14.31″W / 45.5... \n", + "34 Montreal\\n45°30′32.76″N 73°33′45″W / 45.5091... \n", + "35 Montreal\\n45°29′37″N 73°34′36″W / 45.49361°N... \n", + "36 Montreal\\n45°30′32.76″N 73°33′45″W / 45.5091... \n", + "37 Montreal\\n45°30′16.15″N 73°33′22.55″W / 45.5... \n", + "38 Montreal\\n45°30′6.55″N 73°36′23.48″W / 45.50... \n", + "39 Montreal\\n45°30′6.55″N 73°36′23.48″W / 45.50... \n", + "40 Montreal\\n45°31′31.54″N 73°33′51.26″W / 45.5... \n", + "41 Montreal\\n45°31′24.91″N 73°36′17.14″W / 45.5... \n", + "42 Sainte-Anne-de-Bellevue\\n45°24′13″N 73°57′16″W... \n", + "43 Montreal\\n45°29′30″N 73°37′0″W / 45.49167°N ... \n", + "44 Montreal\\n45°30′14″N 73°33′25″W / 45.50389°N... \n", + "45 Senneville\\n45°25′50.1″N 73°57′8.2″W / 45.43... \n", + "46 Montreal\\n45°30′40.12″N 73°33′5.84″W / 45.51... \n", + "47 Montreal\\n45°32′23.5″N 73°36′51.07″W / 45.53... \n", + "48 Montreal\\n45°32′23.5″N 73°36′51.07″W / 45.53... \n", + "49 Montreal\\n45°30′18.97″N 73°34′6.56″W / 45.50... \n", + "50 Montreal\\n45°30′12.82″N 73°33′53.31″W / 45.5... \n", + "51 Montreal\\n45°29′37.68″N 73°35′4.56″W / 45.49... \n", + "52 Montreal\\n45°25′53.04″N 73°40′32.16″W / 45.4... \n", + "53 Montreal\\n45°30′36.58″N 73°33′51.93″W / 45.5... \n", + "54 Westmount\\n45°29′42.36″N 73°35′53.39″W / 45.... \n", + "55 Montreal\\n45°29′42.36″N 73°35′53.39″W / 45.4... \n", + "56 Westmount \n", + "57 Montreal\\n45°30′3.25″N 73°33′35.06″W / 45.50... \n", + "58 Montreal\\n45°29′50.86″N 73°34′7.18″W / 45.49... \n", + "\n", + " Description \n", + "0 The home of the first Mechanics' Institute in ... \n", + "1 A three-and-a-half storey sandstone former ban... \n", + "2 The site of a battle between a group of Iroquo... \n", + "3 A skirmish at Lake of Two Mountains between 28... \n", + "4 Home to The Black Watch (Royal Highland Regime... \n", + "5 A monumental, domed masonry civic building tha... \n", + "6 A stone mansion built for Claude de Ramezay, G... \n", + "7 An excellent example of a Gothic Revival-style... \n", + "8 A Romanesque Revival style church in Montreal'... \n", + "9 One of the best examples of mural decoration d... \n", + "10 An excellent example of a large Romanesque Rev... \n", + "11 An excellent example of Palladian architecture... \n", + "12 A large, stone Victorian mansion that is the b... \n", + "13 A three-and-a-half building that is an example... \n", + "14 Representative of the mansions built by Montre... \n", + "15 One of the first purpose-built nurses' residen... \n", + "16 A grass-covered area about 79 square metres (8... \n", + "17 An early 19th-century canal, 14 kilometres (8.... \n", + "18 An important manufacturing and industrial comp... \n", + "19 A military cemetery for veterans who died in a... \n", + "20 A 17th-century fieldstone former fur trading p... \n", + "21 A two-and-a-half storey stone house that was t... \n", + "22 Two two-and-a-half storey stone attached house... \n", + "23 A fieldstone house that was home to the sister... \n", + "24 A Baroque Revival cathedral that, when built, ... \n", + "25 A four-storey, red brick apartment building th... \n", + "26 A monumental masonic temple resembling a Greek... \n", + "27 The second largest textile mill in Canada for ... \n", + "28 A historic residential suburb developed in acc... \n", + "29 A two-storey stone Neo-Palladian mansion that ... \n", + "30 A 75-hectare (190-acre) botanical garden; its ... \n", + "31 A five-storey stone building and one of the be... \n", + "32 One of Canada's most famous sporting venues; a... \n", + "33 The location where Paul Chomedey de Maisonneuv... \n", + "34 A four-storey theatre and cultural centre cons... \n", + "35 The former motherhouse of the Grey Nuns, now p... \n", + "36 A 67-hectare (170-acre) cemetery located on th... \n", + "37 An immense stone church built in the Romantic ... \n", + "38 The largest cemetery in Canada and an outstand... \n", + "39 An cinema with an Art Deco exterior and a comb... \n", + "40 A five-storey brick nurses’ residence on the c... \n", + "41 A movie palace and an exceptional example of B... \n", + "42 A canal constructed by the Board of Works of t... \n", + "43 A large Roman Catholic pilgrimage site located... \n", + "44 A religious seminary with garden; a rare and r... \n", + "45 A 565-hectare (1,400-acre) historic district t... \n", + "46 Two houses that together served as the residen... \n", + "47 A predominantly Byzantine-style church, it is ... \n", + "48 An Anglican church in downtown Montreal that i... \n", + "49 A large stone church in the High Victorian Got... \n", + "50 A noted example of French Gothic Revival archi... \n", + "51 Two 13-metre (43 ft) towers that were once bas... \n", + "52 A single-storey stone warehouse located in an ... \n", + "53 A 6-kilometre (3.7 mi) long stretch of Saint L... \n", + "54 An asymmetrical one-and-a-half storey brick vi... \n", + "55 The Second Empire-style mansion of Thomas Shau... \n", + "56 The historic district of Westmount which epito... \n", + "57 A four-and-a-half-storey stone Gothic Revival ... \n", + "58 A railway terminal and Canadian Pacific Railwa... \n", + "Очищенная таблица:\n", + " Site \\\n", + "0 Atwater Library of the Mechanics' Institute of... \n", + "1 Bank of Montreal \n", + "2 Battle of Rivière des Prairies / Battle of Cou... \n", + "3 Battle of the Lake of Two Mountains \n", + "4 Black Watch (Royal Highland Regiment) of Canad... \n", + "5 Bonsecours Market \n", + "6 Château De Ramezay / India House \n", + "7 Christ Church Cathedral \n", + "8 Church of Notre-Dame-de-la-Défense \n", + "9 Church of Saint-Léon-de-Westmount \n", + "10 Erskine and American United Church \n", + "11 Former Montreal Custom House \n", + "12 George Stephen House / Mount Stephen Club \n", + "13 Grey Nuns' Hospital \n", + "14 H. Vincent Meredith Residence \n", + "15 Hersey Pavilion \n", + "16 Hochelaga \n", + "17 Lachine Canal \n", + "18 Lachine Canal Manufacturing Complex \n", + "19 Last Post Fund National Field of Honour \n", + "20 LeBer-LeMoyne House \n", + "21 Louis-Joseph Papineau \n", + "22 Maison Cartier \n", + "23 Maison Saint-Gabriel \n", + "24 Marie-Reine-du-Monde Cathedral \n", + "25 Marlborough Apartments \n", + "26 Masonic Memorial Temple \n", + "27 Merchants Textile Mill \n", + "28 Model City of Mount Royal \n", + "29 Monklands / Villa Maria Convent \n", + "30 Montreal Botanical Garden \n", + "31 Montreal City Hall \n", + "32 Montreal Forum \n", + "33 Montreal's Birthplace \n", + "34 Monument National \n", + "35 Mother House of the Grey Nuns of Montreal \n", + "36 Mount Royal Cemetery \n", + "37 Notre-Dame Roman Catholic Church / Basilica \n", + "38 Notre-Dame-des-Neiges Cemetery \n", + "39 Outremont Theatre \n", + "40 Pavillon Mailloux \n", + "41 Rialto Theatre \n", + "42 Sainte-Anne-de-Bellevue Canal \n", + "43 Saint Joseph's Oratory of Mount Royal \n", + "44 Saint-Sulpice Seminary and its Gardens \n", + "45 Senneville Historic District \n", + "46 Sir George-Étienne Cartier \n", + "47 St. George Antiochian Orthodox Church \n", + "48 St. George's Anglican Church \n", + "49 St. James United Church \n", + "50 St. Patrick's Basilica \n", + "51 Sulpician Towers / Fort de la Montagne \n", + "52 The Fur Trade at Lachine \n", + "53 The Main \n", + "54 Trafalgar Lodge \n", + "55 Van Horne / Shaughnessy House \n", + "56 Westmount District \n", + "57 Wilson Chambers \n", + "58 Windsor Station (Canadian Pacific) \n", + "\n", + " Date(s) Designated \\\n", + "0 1920 (completed) 2005 \n", + "1 1894 (completed) 1990 \n", + "2 1690 (battle) 1924 \n", + "3 1689 (battle) 1925 \n", + "4 1906 (completed) 2008 \n", + "5 1847 (completed) 1984 \n", + "6 1705 (completed), 1756 (rebuilt after fire) 1949 \n", + "7 1860 (completed) 1999 \n", + "8 1919 (completed) 2002 \n", + "9 1903 (completed) 1997 \n", + "10 1894 (completed) 1998 \n", + "11 1838 (completed) 1997 \n", + "12 1881 (completed) 1971 \n", + "13 1765 (completed) 1973 \n", + "14 1897 (completed) 1990 \n", + "15 1905 (completed) 1997 \n", + "16 1300s (ca.) (first construction of fortified v... 1920 \n", + "17 1825 (completed) 1929 \n", + "18 1825 (completed) 1996 \n", + "19 1930 (established) 2007 \n", + "20 1671 (completed) 2002 \n", + "21 1785 (completed) 1968 \n", + "22 1813 (completed) 1982 \n", + "23 1668 (completed) 2007 \n", + "24 1894 (consecrated) 1999 \n", + "25 1900 (completed) 1990 \n", + "26 1930 (completed) 2001 \n", + "27 1882 (established) 1989 \n", + "28 1914 (district plan) 2008 \n", + "29 1804 (completed) 1951 \n", + "30 1931 (established) 2008 \n", + "31 1878 (completed), 1922 (rebuilt) 1984 \n", + "32 1924 (completed) 1997 \n", + "33 1642 (event) 1924 \n", + "34 1893 (completed) 1985 \n", + "35 1871 (completed) 2011 \n", + "36 1852 (established) 1999 \n", + "37 1829 (completed) 1989 \n", + "38 1854 (established) 1999 \n", + "39 1929 (completed) 1993 \n", + "40 1931 (completed) 1997 \n", + "41 1924 (completed) 1993 \n", + "42 1843 (completed) 1929 \n", + "43 1904 (established), 1967 (basilica completed) 2003 \n", + "44 1687 (completed) 1980 \n", + "45 1860 (established) 2002 \n", + "46 1838 (completed) 1964 \n", + "47 1940 (completed) 1999 \n", + "48 1870 (completed) 1990 \n", + "49 1888 (completed) 1996 \n", + "50 1847 (completed) 1990 \n", + "51 1694 (completed) 1970 \n", + "52 1803 (completed) 1970 \n", + "53 NaN 1996 \n", + "54 1848 (completed) 1990 \n", + "55 1848 (completed) 1973 \n", + "56 1874 (city incorporated) 2012 \n", + "57 1868 (completed) 1990 \n", + "58 1889 (completed) 1975 \n", + "\n", + " Location \\\n", + "0 Montreal\\n45°29′19.17″N 73°35′3.41″W / 45.48... \n", + "1 Montreal\\n45°29′15.81″N 73°34′7.45″W / 45.48... \n", + "2 Montreal\\n45°41′56.07″N 73°30′13.68″W / 45.6... \n", + "3 Senneville\\n45°26′52.69″N 73°56′25.41″W / 45... \n", + "4 Montreal\\n45°30′28.37″N 73°34′11.38″W / 45.5... \n", + "5 Montreal\\n45°30′32.21″N 73°33′5.18″W / 45.50... \n", + "6 Montreal\\n45°30′31.54″N 73°33′11.28″W / 45.5... \n", + "7 Montreal\\n45°30′13.21″N 73°34′12.04″W / 45.5... \n", + "8 Montreal\\n45°32′5.82″N 73°36′41.28″W / 45.53... \n", + "9 Westmount\\n45°29′7.58″N 73°35′30.75″W / 45.4... \n", + "10 Montreal\\n45°29′56.98″N 73°34′47.4″W / 45.49... \n", + "11 Montreal\\n45°30′11.98″N 73°33′16.5″W / 45.50... \n", + "12 Montreal\\n45°29′56.73″N 73°34′32.93″W / 45.4... \n", + "13 Montreal\\n45°30′1.31″N 73°33′17.2″W / 45.500... \n", + "14 Montreal\\n45°30′15.11″N 73°34′54.7″W / 45.50... \n", + "15 Montreal\\n45°30′30.94″N 73°34′50.01″W / 45.5... \n", + "16 Montreal\\n45°30′12.83″N 73°34′30.58″W / 45.5... \n", + "17 Montreal\\n45°27′30″N 73°36′42″W / 45.45833°N... \n", + "18 Montreal\\n45°27′30″N 73°36′42″W / 45.45833°N... \n", + "19 Pointe-Claire\\n45°26′38.80″N 73°50′15.58″W / ... \n", + "20 Montreal\\n45°25′48″N 73°39′59″W / 45.43000°N... \n", + "21 Montreal\\n45°30′35.77″N 73°33′7.88″W / 45.50... \n", + "22 Montreal\\n45°30′28.56″N 73°33′9.18″W / 45.50... \n", + "23 Montreal\\n45°28′33.37″N 73°33′21.58″W / 45.4... \n", + "24 Montreal\\n45°29′57.86″N 73°34′7.36″W / 45.49... \n", + "25 Montreal\\n45°30′25.07″N 73°34′32.6″W / 45.50... \n", + "26 Montreal\\n45°29′40.92″N 73°34′58.85″W / 45.4... \n", + "27 Montreal\\n45°28′32″N 73°34′48″W / 45.47556°N... \n", + "28 Mount Royal\\n45°30′58″N 73°38′35″W / 45.5161... \n", + "29 Montreal\\n45°28′54.53″N 73°37′1.6″W / 45.481... \n", + "30 Montreal\\n45°33′26.00″N 73°33′24.50″W / 45.5... \n", + "31 Montreal\\n45°30′31.84″N 73°33′14.45″W / 45.5... \n", + "32 Montreal\\n45°29′25″N 73°35′5″W / 45.49028°N ... \n", + "33 Montreal\\n45°30′12.27″N 73°33′14.31″W / 45.5... \n", + "34 Montreal\\n45°30′32.76″N 73°33′45″W / 45.5091... \n", + "35 Montreal\\n45°29′37″N 73°34′36″W / 45.49361°N... \n", + "36 Montreal\\n45°30′32.76″N 73°33′45″W / 45.5091... \n", + "37 Montreal\\n45°30′16.15″N 73°33′22.55″W / 45.5... \n", + "38 Montreal\\n45°30′6.55″N 73°36′23.48″W / 45.50... \n", + "39 Montreal\\n45°30′6.55″N 73°36′23.48″W / 45.50... \n", + "40 Montreal\\n45°31′31.54″N 73°33′51.26″W / 45.5... \n", + "41 Montreal\\n45°31′24.91″N 73°36′17.14″W / 45.5... \n", + "42 Sainte-Anne-de-Bellevue\\n45°24′13″N 73°57′16″W... \n", + "43 Montreal\\n45°29′30″N 73°37′0″W / 45.49167°N ... \n", + "44 Montreal\\n45°30′14″N 73°33′25″W / 45.50389°N... \n", + "45 Senneville\\n45°25′50.1″N 73°57′8.2″W / 45.43... \n", + "46 Montreal\\n45°30′40.12″N 73°33′5.84″W / 45.51... \n", + "47 Montreal\\n45°32′23.5″N 73°36′51.07″W / 45.53... \n", + "48 Montreal\\n45°32′23.5″N 73°36′51.07″W / 45.53... \n", + "49 Montreal\\n45°30′18.97″N 73°34′6.56″W / 45.50... \n", + "50 Montreal\\n45°30′12.82″N 73°33′53.31″W / 45.5... \n", + "51 Montreal\\n45°29′37.68″N 73°35′4.56″W / 45.49... \n", + "52 Montreal\\n45°25′53.04″N 73°40′32.16″W / 45.4... \n", + "53 Montreal\\n45°30′36.58″N 73°33′51.93″W / 45.5... \n", + "54 Westmount\\n45°29′42.36″N 73°35′53.39″W / 45.... \n", + "55 Montreal\\n45°29′42.36″N 73°35′53.39″W / 45.4... \n", + "56 Westmount \n", + "57 Montreal\\n45°30′3.25″N 73°33′35.06″W / 45.50... \n", + "58 Montreal\\n45°29′50.86″N 73°34′7.18″W / 45.49... \n", + "\n", + " Description Date(s)_base \\\n", + "0 The home of the first Mechanics' Institute in ... 1920 \n", + "1 A three-and-a-half storey sandstone former ban... 1894 \n", + "2 The site of a battle between a group of Iroquo... 1690 \n", + "3 A skirmish at Lake of Two Mountains between 28... 1689 \n", + "4 Home to The Black Watch (Royal Highland Regime... 1906 \n", + "5 A monumental, domed masonry civic building tha... 1847 \n", + "6 A stone mansion built for Claude de Ramezay, G... 1705 \n", + "7 An excellent example of a Gothic Revival-style... 1860 \n", + "8 A Romanesque Revival style church in Montreal'... 1919 \n", + "9 One of the best examples of mural decoration d... 1903 \n", + "10 An excellent example of a large Romanesque Rev... 1894 \n", + "11 An excellent example of Palladian architecture... 1838 \n", + "12 A large, stone Victorian mansion that is the b... 1881 \n", + "13 A three-and-a-half building that is an example... 1765 \n", + "14 Representative of the mansions built by Montre... 1897 \n", + "15 One of the first purpose-built nurses' residen... 1905 \n", + "16 A grass-covered area about 79 square metres (8... 1300s \n", + "17 An early 19th-century canal, 14 kilometres (8.... 1825 \n", + "18 An important manufacturing and industrial comp... 1825 \n", + "19 A military cemetery for veterans who died in a... 1930 \n", + "20 A 17th-century fieldstone former fur trading p... 1671 \n", + "21 A two-and-a-half storey stone house that was t... 1785 \n", + "22 Two two-and-a-half storey stone attached house... 1813 \n", + "23 A fieldstone house that was home to the sister... 1668 \n", + "24 A Baroque Revival cathedral that, when built, ... 1894 \n", + "25 A four-storey, red brick apartment building th... 1900 \n", + "26 A monumental masonic temple resembling a Greek... 1930 \n", + "27 The second largest textile mill in Canada for ... 1882 \n", + "28 A historic residential suburb developed in acc... 1914 \n", + "29 A two-storey stone Neo-Palladian mansion that ... 1804 \n", + "30 A 75-hectare (190-acre) botanical garden; its ... 1931 \n", + "31 A five-storey stone building and one of the be... 1878 \n", + "32 One of Canada's most famous sporting venues; a... 1924 \n", + "33 The location where Paul Chomedey de Maisonneuv... 1642 \n", + "34 A four-storey theatre and cultural centre cons... 1893 \n", + "35 The former motherhouse of the Grey Nuns, now p... 1871 \n", + "36 A 67-hectare (170-acre) cemetery located on th... 1852 \n", + "37 An immense stone church built in the Romantic ... 1829 \n", + "38 The largest cemetery in Canada and an outstand... 1854 \n", + "39 An cinema with an Art Deco exterior and a comb... 1929 \n", + "40 A five-storey brick nurses’ residence on the c... 1931 \n", + "41 A movie palace and an exceptional example of B... 1924 \n", + "42 A canal constructed by the Board of Works of t... 1843 \n", + "43 A large Roman Catholic pilgrimage site located... 1904 \n", + "44 A religious seminary with garden; a rare and r... 1687 \n", + "45 A 565-hectare (1,400-acre) historic district t... 1860 \n", + "46 Two houses that together served as the residen... 1838 \n", + "47 A predominantly Byzantine-style church, it is ... 1940 \n", + "48 An Anglican church in downtown Montreal that i... 1870 \n", + "49 A large stone church in the High Victorian Got... 1888 \n", + "50 A noted example of French Gothic Revival archi... 1847 \n", + "51 Two 13-metre (43 ft) towers that were once bas... 1694 \n", + "52 A single-storey stone warehouse located in an ... 1803 \n", + "53 A 6-kilometre (3.7 mi) long stretch of Saint L... NaN \n", + "54 An asymmetrical one-and-a-half storey brick vi... 1848 \n", + "55 The Second Empire-style mansion of Thomas Shau... 1848 \n", + "56 The historic district of Westmount which epito... 1874 \n", + "57 A four-and-a-half-storey stone Gothic Revival ... 1868 \n", + "58 A railway terminal and Canadian Pacific Railwa... 1889 \n", + "\n", + " Date(s)_meta \n", + "0 completed \n", + "1 completed \n", + "2 battle \n", + "3 battle \n", + "4 completed \n", + "5 completed \n", + "6 completed \n", + "7 completed \n", + "8 completed \n", + "9 completed \n", + "10 completed \n", + "11 completed \n", + "12 completed \n", + "13 completed \n", + "14 completed \n", + "15 completed \n", + "16 ca. \n", + "17 completed \n", + "18 completed \n", + "19 established \n", + "20 completed \n", + "21 completed \n", + "22 completed \n", + "23 completed \n", + "24 consecrated \n", + "25 completed \n", + "26 completed \n", + "27 established \n", + "28 district plan \n", + "29 completed \n", + "30 established \n", + "31 completed \n", + "32 completed \n", + "33 event \n", + "34 completed \n", + "35 completed \n", + "36 established \n", + "37 completed \n", + "38 established \n", + "39 completed \n", + "40 completed \n", + "41 completed \n", + "42 completed \n", + "43 established \n", + "44 completed \n", + "45 established \n", + "46 completed \n", + "47 completed \n", + "48 completed \n", + "49 completed \n", + "50 completed \n", + "51 completed \n", + "52 completed \n", + "53 NaN \n", + "54 completed \n", + "55 completed \n", + "56 city incorporated \n", + "57 completed \n", + "58 completed \n", + "\n", + "==================================================\n", + "\n", + "=== Таблица 36/20 (Индекс в train: 13261) ===\n", + "Оригинальная таблица: csv/204-csv/808.csv\n", + "Размер: 52 строк, 6 колонок\n", + "--------------------------------------------------\n", + "Оригинальная таблица:\n", + " Team No Driver Class \\\n", + "0 Carlin Motorsport 1 James Courtney C \n", + "1 Carlin Motorsport 1 Derek Hayes C \n", + "2 Carlin Motorsport 2 Alan van der Merwe C \n", + "3 Carlin Motorsport 21 Michael Keohane C \n", + "4 Carlin Motorsport 22 Shinya Hosokawa C \n", + "5 Manor Motorsport 3 Mark Taylor C \n", + "6 Manor Motorsport 4 Richard Antinucci C \n", + "7 Manor Motorsport 34 Ronnie Bremer C \n", + "8 Fortec Motorsport 5 Fabio Carbone C \n", + "9 Fortec Motorsport 6 Heikki Kovalainen C \n", + "10 Alan Docking Racing 7 Robbie Kerr C \n", + "11 Alan Docking Racing 8 Mark Mayall C \n", + "12 Alan Docking Racing 13 Tor Graves C \n", + "13 Team Avanti 9 Matthew Gilmore C \n", + "14 Team Avanti 9 James Andanson C \n", + "15 Team Avanti 9 Jeroen Bleekemolen C \n", + "16 Team Avanti 10 Stefano Fabi C \n", + "17 Promatecme UK 11 Bruce Jouanny C \n", + "18 Promatecme UK 12 Ernani Judice C \n", + "19 Promatecme UK 12 Matthew Gilmore C \n", + "20 Duma Racing 14 Robert Dahlgren C \n", + "21 Duma Racing 14 John Antoniadis C \n", + "22 Menu Motorsport 16 Rob Austin C \n", + "23 Menu Motorsport 17 Giandomenico Brusatin C \n", + "24 Menu Motorsport 17 Stefan de Groot C \n", + "25 Motaworld Racing 18 Andrew Thompson C \n", + "26 Motaworld Racing 19 Tom Sisley C \n", + "27 Motaworld Racing 19 Stefan Hodgetts C \n", + "28 Fred Goddard Racing 51 Fairuz Fauzy S \n", + "29 Fred Goddard Racing 52 Jason Tahinci S \n", + "30 Fred Goddard Racing 52 Earl Goddard S \n", + "31 Performance Racing 53 Justin Sherwood S \n", + "32 Performance Racing 54 Julien Schell S \n", + "33 Meritus Racing 55 Gavin Smith S \n", + "34 Meritus Racing 56 Stephen Colbert S \n", + "35 Essential Motorsport 58 Reck Junior S \n", + "36 Essential Motorsport 58 Ricardo Teixeira S \n", + "37 Essential Motorsport 59 Jesper Carlsen S \n", + "38 Scuderia Etruria 60 Diego Romanini S \n", + "39 Sweeney Racing 61 Adam Carroll S \n", + "40 Sweeney Racing 62 Billy Asaro S \n", + "41 Sweeney Racing 63 Robert Dahlgren S \n", + "42 Team Park 64 Stephen Colbert S \n", + "43 Team Park 64 Stefan Hodgetts S \n", + "44 Team Park 64 Adam Jones S \n", + "45 Team Park 64 David Clark S \n", + "46 Team Park 65 David Clark S \n", + "47 Diamond Racing 66 Pedro Barral S \n", + "48 Diamond Racing 77 Harold Primat S \n", + "49 T-Sport 67 Clivio Piccione S \n", + "50 T-Sport 68 Karun Chandhok S \n", + "51 Hill Speed Motorsport 69 Luke Stevens S \n", + "\n", + " Chassis Engine \n", + "0 Dallara F302 Mugen-Honda \n", + "1 Dallara F302 Mugen-Honda \n", + "2 Dallara F302 Mugen-Honda \n", + "3 Dallara F302 Mugen-Honda \n", + "4 Dallara F302 Mugen-Honda \n", + "5 Dallara F302 Mugen-Honda \n", + "6 Dallara F302 Mugen-Honda \n", + "7 Dallara F302 Mugen-Honda \n", + "8 Dallara F302 Renault Sodemo \n", + "9 Dallara F302 Renault Sodemo \n", + "10 Dallara F302 Mugen-Honda \n", + "11 Dallara F302 Mugen-Honda \n", + "12 Dallara F302 Mugen-Honda \n", + "13 Dallara F302/Ralt F302 Mugen-Honda \n", + "14 Ralt F302 Mugen-Honda \n", + "15 Ralt F302 Mugen-Honda \n", + "16 Dallara F302 Mugen-Honda \n", + "17 Dallara F302 Mugen-Honda \n", + "18 Dallara F302 Mugen-Honda \n", + "19 Dallara F302 Mugen-Honda \n", + "20 Dallara F302 Mugen-Honda \n", + "21 Dallara F302 Mugen-Honda \n", + "22 Dallara F302 Opel Spiess \n", + "23 Dallara F302 Opel Spiess \n", + "24 Dallara F302 Opel Spiess \n", + "25 Dallara F302 Ford \n", + "26 Dallara F302 Ford \n", + "27 Dallara F302 Ford \n", + "28 Dallara F301 Renault Sodemo \n", + "29 Dallara F301 Renault Sodemo \n", + "30 Dallara F301 Renault Sodemo \n", + "31 Dallara F301 Opel Spiess \n", + "32 Dallara F301 Opel Spiess \n", + "33 Dallara F301 Mugen-Honda \n", + "34 Dallara F301 Mugen-Honda \n", + "35 Dallara F301 Toyota \n", + "36 Dallara F301 Toyota \n", + "37 Dallara F301 Toyota \n", + "38 Dallara F301 Opel Spiess \n", + "39 Dallara F301 Mugen-Honda \n", + "40 Dallara F301 Mugen-Honda \n", + "41 Dallara F301 Mugen-Honda \n", + "42 Dallara F301 Opel Spiess \n", + "43 Dallara F301 Opel Spiess \n", + "44 Dallara F301 Opel Spiess \n", + "45 Dallara F301 Opel Spiess \n", + "46 Dallara F301 Opel Spiess \n", + "47 Dallara F301 Mugen-Honda \n", + "48 Dallara F301 Mugen-Honda \n", + "49 Dallara F301 Mugen-Honda \n", + "50 Dallara F301 Mugen-Honda \n", + "51 Dallara F301 Opel Spiess \n", + "Очищенная таблица:\n", + " Team No Driver Class \\\n", + "0 Carlin Motorsport 1 James Courtney C \n", + "1 Carlin Motorsport 1 Derek Hayes C \n", + "2 Carlin Motorsport 2 Alan van der Merwe C \n", + "3 Carlin Motorsport 21 Michael Keohane C \n", + "4 Carlin Motorsport 22 Shinya Hosokawa C \n", + "5 Manor Motorsport 3 Mark Taylor C \n", + "6 Manor Motorsport 4 Richard Antinucci C \n", + "7 Manor Motorsport 34 Ronnie Bremer C \n", + "8 Fortec Motorsport 5 Fabio Carbone C \n", + "9 Fortec Motorsport 6 Heikki Kovalainen C \n", + "10 Alan Docking Racing 7 Robbie Kerr C \n", + "11 Alan Docking Racing 8 Mark Mayall C \n", + "12 Alan Docking Racing 13 Tor Graves C \n", + "13 Team Avanti 9 Matthew Gilmore C \n", + "14 Team Avanti 9 James Andanson C \n", + "15 Team Avanti 9 Jeroen Bleekemolen C \n", + "16 Team Avanti 10 Stefano Fabi C \n", + "17 Promatecme UK 11 Bruce Jouanny C \n", + "18 Promatecme UK 12 Ernani Judice C \n", + "19 Promatecme UK 12 Matthew Gilmore C \n", + "20 Duma Racing 14 Robert Dahlgren C \n", + "21 Duma Racing 14 John Antoniadis C \n", + "22 Menu Motorsport 16 Rob Austin C \n", + "23 Menu Motorsport 17 Giandomenico Brusatin C \n", + "24 Menu Motorsport 17 Stefan de Groot C \n", + "25 Motaworld Racing 18 Andrew Thompson C \n", + "26 Motaworld Racing 19 Tom Sisley C \n", + "27 Motaworld Racing 19 Stefan Hodgetts C \n", + "28 Fred Goddard Racing 51 Fairuz Fauzy S \n", + "29 Fred Goddard Racing 52 Jason Tahinci S \n", + "30 Fred Goddard Racing 52 Earl Goddard S \n", + "31 Performance Racing 53 Justin Sherwood S \n", + "32 Performance Racing 54 Julien Schell S \n", + "33 Meritus Racing 55 Gavin Smith S \n", + "34 Meritus Racing 56 Stephen Colbert S \n", + "35 Essential Motorsport 58 Reck Junior S \n", + "36 Essential Motorsport 58 Ricardo Teixeira S \n", + "37 Essential Motorsport 59 Jesper Carlsen S \n", + "38 Scuderia Etruria 60 Diego Romanini S \n", + "39 Sweeney Racing 61 Adam Carroll S \n", + "40 Sweeney Racing 62 Billy Asaro S \n", + "41 Sweeney Racing 63 Robert Dahlgren S \n", + "42 Team Park 64 Stephen Colbert S \n", + "43 Team Park 64 Stefan Hodgetts S \n", + "44 Team Park 64 Adam Jones S \n", + "45 Team Park 64 David Clark S \n", + "46 Team Park 65 David Clark S \n", + "47 Diamond Racing 66 Pedro Barral S \n", + "48 Diamond Racing 77 Harold Primat S \n", + "49 T-Sport 67 Clivio Piccione S \n", + "50 T-Sport 68 Karun Chandhok S \n", + "51 Hill Speed Motorsport 69 Luke Stevens S \n", + "\n", + " Chassis Engine \n", + "0 Dallara F302 Mugen-Honda \n", + "1 Dallara F302 Mugen-Honda \n", + "2 Dallara F302 Mugen-Honda \n", + "3 Dallara F302 Mugen-Honda \n", + "4 Dallara F302 Mugen-Honda \n", + "5 Dallara F302 Mugen-Honda \n", + "6 Dallara F302 Mugen-Honda \n", + "7 Dallara F302 Mugen-Honda \n", + "8 Dallara F302 Renault Sodemo \n", + "9 Dallara F302 Renault Sodemo \n", + "10 Dallara F302 Mugen-Honda \n", + "11 Dallara F302 Mugen-Honda \n", + "12 Dallara F302 Mugen-Honda \n", + "13 Dallara F302/Ralt F302 Mugen-Honda \n", + "14 Ralt F302 Mugen-Honda \n", + "15 Ralt F302 Mugen-Honda \n", + "16 Dallara F302 Mugen-Honda \n", + "17 Dallara F302 Mugen-Honda \n", + "18 Dallara F302 Mugen-Honda \n", + "19 Dallara F302 Mugen-Honda \n", + "20 Dallara F302 Mugen-Honda \n", + "21 Dallara F302 Mugen-Honda \n", + "22 Dallara F302 Opel Spiess \n", + "23 Dallara F302 Opel Spiess \n", + "24 Dallara F302 Opel Spiess \n", + "25 Dallara F302 Ford \n", + "26 Dallara F302 Ford \n", + "27 Dallara F302 Ford \n", + "28 Dallara F301 Renault Sodemo \n", + "29 Dallara F301 Renault Sodemo \n", + "30 Dallara F301 Renault Sodemo \n", + "31 Dallara F301 Opel Spiess \n", + "32 Dallara F301 Opel Spiess \n", + "33 Dallara F301 Mugen-Honda \n", + "34 Dallara F301 Mugen-Honda \n", + "35 Dallara F301 Toyota \n", + "36 Dallara F301 Toyota \n", + "37 Dallara F301 Toyota \n", + "38 Dallara F301 Opel Spiess \n", + "39 Dallara F301 Mugen-Honda \n", + "40 Dallara F301 Mugen-Honda \n", + "41 Dallara F301 Mugen-Honda \n", + "42 Dallara F301 Opel Spiess \n", + "43 Dallara F301 Opel Spiess \n", + "44 Dallara F301 Opel Spiess \n", + "45 Dallara F301 Opel Spiess \n", + "46 Dallara F301 Opel Spiess \n", + "47 Dallara F301 Mugen-Honda \n", + "48 Dallara F301 Mugen-Honda \n", + "49 Dallara F301 Mugen-Honda \n", + "50 Dallara F301 Mugen-Honda \n", + "51 Dallara F301 Opel Spiess \n", + "\n", + "==================================================\n", + "\n", + "=== Таблица 37/20 (Индекс в train: 106) ===\n", + "Оригинальная таблица: csv/204-csv/575.csv\n", + "Размер: 16 строк, 9 колонок\n", + "--------------------------------------------------\n", + "Оригинальная таблица:\n", + " Place Position Number Name League One FA Cup League Cup \\\n", + "0 1 MF 4 Luke Summerfield 5 0 0 \n", + "1 2 MF 17 Paul Parry 3 0 1 \n", + "2 3 FW 24 Tom Eaves 3 0 0 \n", + "3 =4 MF 11 Jon Taylor 1 0 0 \n", + "4 =4 DF 5 Darren Jones * 2 0 0 \n", + "5 =4 MF 23 Adam Reach * 2 0 0 \n", + "6 =4 MF 22 Ryan Woods 2 0 0 \n", + "7 =4 FW 26 Curtis Main * 2 0 0 \n", + "8 =5 MF 2 Jermaine Grandison 1 0 0 \n", + "9 =5 MF 8 Dave McAllister 1 0 0 \n", + "10 =5 FW 15 Aaron Wildig 1 0 0 \n", + "11 =5 FW 29 Asa Hall 1 0 0 \n", + "12 =5 FW 7 Gozie Ugwu* 1 0 0 \n", + "13 =5 DF 18 Connor Goldson 1 0 0 \n", + "14 =5 MF 10 Nicky Wroe* 1 0 0 \n", + "15 =5 FW 9 Tom Bradshaw 1 0 0 \n", + "\n", + " Football League Trophy Total \n", + "0 0 5 \n", + "1 0 4 \n", + "2 0 3 \n", + "3 1 2 \n", + "4 0 2 \n", + "5 0 2 \n", + "6 0 2 \n", + "7 0 2 \n", + "8 0 1 \n", + "9 0 1 \n", + "10 0 1 \n", + "11 0 1 \n", + "12 0 1 \n", + "13 0 1 \n", + "14 0 1 \n", + "15 0 1 \n", + "Очищенная таблица:\n", + " Place Position Number Name League One FA Cup League Cup \\\n", + "0 1 MF 4 Luke Summerfield 5 0 0 \n", + "1 2 MF 17 Paul Parry 3 0 1 \n", + "2 3 FW 24 Tom Eaves 3 0 0 \n", + "3 =4 MF 11 Jon Taylor 1 0 0 \n", + "4 =4 DF 5 Darren Jones * 2 0 0 \n", + "5 =4 MF 23 Adam Reach * 2 0 0 \n", + "6 =4 MF 22 Ryan Woods 2 0 0 \n", + "7 =4 FW 26 Curtis Main * 2 0 0 \n", + "8 =5 MF 2 Jermaine Grandison 1 0 0 \n", + "9 =5 MF 8 Dave McAllister 1 0 0 \n", + "10 =5 FW 15 Aaron Wildig 1 0 0 \n", + "11 =5 FW 29 Asa Hall 1 0 0 \n", + "12 =5 FW 7 Gozie Ugwu* 1 0 0 \n", + "13 =5 DF 18 Connor Goldson 1 0 0 \n", + "14 =5 MF 10 Nicky Wroe* 1 0 0 \n", + "15 =5 FW 9 Tom Bradshaw 1 0 0 \n", + "\n", + " Football League Trophy Total \n", + "0 0 5 \n", + "1 0 4 \n", + "2 0 3 \n", + "3 1 2 \n", + "4 0 2 \n", + "5 0 2 \n", + "6 0 2 \n", + "7 0 2 \n", + "8 0 1 \n", + "9 0 1 \n", + "10 0 1 \n", + "11 0 1 \n", + "12 0 1 \n", + "13 0 1 \n", + "14 0 1 \n", + "15 0 1 \n", + "\n", + "==================================================\n", + "\n", + "=== Таблица 38/20 (Индекс в train: 12432) ===\n", + "Оригинальная таблица: csv/201-csv/42.csv\n", + "Размер: 7 строк, 4 колонок\n", + "--------------------------------------------------\n", + "Оригинальная таблица:\n", + " Year Title Role \\\n", + "0 1980 Hawaii Five-O Neal Forrester \n", + "1 1982 American Playhouse Jed Jenkins \n", + "2 1991 Saturday Night Live Host \n", + "3 1993 Frasier Doug \n", + "4 1995 Saturday Night Live Host \n", + "5 2010 Planet Sheen Glonb \n", + "6 2012–present The Newsroom Will McAvoy \n", + "\n", + " Notes \n", + "0 Episode: \\The Flight of the Jewels\\\"\" \n", + "1 Episode: \\The Fifth of July\\\"\" \n", + "2 Episode: Jeff Daniels/Color Me Badd \n", + "3 Episode: \\Here's Looking at You\\\"\" \n", + "4 Episode: Jeff Daniels/Luscious Jackson \n", + "5 Episode: \\What's Up Chock?\\\"\" \n", + "6 19 episodes\\nPrimetime Emmy Award for Outstand... \n", + "Очищенная таблица:\n", + " Year Title Role \\\n", + "0 1980.0 Hawaii Five-O Neal Forrester \n", + "1 1982.0 American Playhouse Jed Jenkins \n", + "2 1991.0 Saturday Night Live Host \n", + "3 1993.0 Frasier Doug \n", + "4 1995.0 Saturday Night Live Host \n", + "5 2010.0 Planet Sheen Glonb \n", + "6 NaN The Newsroom Will McAvoy \n", + "\n", + " Notes \n", + "0 Episode: \\The Flight of the Jewels\\\"\" \n", + "1 Episode: \\The Fifth of July\\\"\" \n", + "2 Episode: Jeff Daniels/Color Me Badd \n", + "3 Episode: \\Here's Looking at You\\\"\" \n", + "4 Episode: Jeff Daniels/Luscious Jackson \n", + "5 Episode: \\What's Up Chock?\\\"\" \n", + "6 19 episodes\\nPrimetime Emmy Award for Outstand... \n", + "\n", + "==================================================\n", + "\n", + "=== Таблица 39/20 (Индекс в train: 13202) ===\n", + "Оригинальная таблица: csv/203-csv/608.csv\n", + "Размер: 11 строк, 6 колонок\n", + "--------------------------------------------------\n", + "Оригинальная таблица:\n", + " Rank Nation Gold Silver Bronze Total\n", + "0 1 Chinese Taipei (TPE) 2 0 0 2\n", + "1 1 Russia (RUS) 2 0 0 2\n", + "2 3 Great Britain (GBR) 1 0 0 1\n", + "3 4 Slovakia (SVK) 0 2 2 4\n", + "4 5 China (CHN) 0 2 1 3\n", + "5 6 Mexico (MEX) 0 1 1 2\n", + "6 7 Germany (GER) 0 0 2 2\n", + "7 8 South Korea (KOR) 0 0 1 1\n", + "8 8 Switzerland (SUI) 0 0 1 1\n", + "9 8 Thailand (THA) 0 0 1 1\n", + "10 8 Uzbekistan (UZB) 0 0 1 1\n", + "Очищенная таблица:\n", + " Rank Nation Gold Silver Bronze Total Nation_base \\\n", + "0 1 Chinese Taipei (TPE) 2 0 0 2 Chinese Taipei \n", + "1 1 Russia (RUS) 2 0 0 2 Russia \n", + "2 3 Great Britain (GBR) 1 0 0 1 Great Britain \n", + "3 4 Slovakia (SVK) 0 2 2 4 Slovakia \n", + "4 5 China (CHN) 0 2 1 3 China \n", + "5 6 Mexico (MEX) 0 1 1 2 Mexico \n", + "6 7 Germany (GER) 0 0 2 2 Germany \n", + "7 8 South Korea (KOR) 0 0 1 1 South Korea \n", + "8 8 Switzerland (SUI) 0 0 1 1 Switzerland \n", + "9 8 Thailand (THA) 0 0 1 1 Thailand \n", + "10 8 Uzbekistan (UZB) 0 0 1 1 Uzbekistan \n", + "\n", + " Nation_meta \n", + "0 TPE \n", + "1 RUS \n", + "2 GBR \n", + "3 SVK \n", + "4 CHN \n", + "5 MEX \n", + "6 GER \n", + "7 KOR \n", + "8 SUI \n", + "9 THA \n", + "10 UZB \n", + "\n", + "==================================================\n", + "\n", + "=== Таблица 40/20 (Индекс в train: 2615) ===\n", + "Оригинальная таблица: csv/203-csv/148.csv\n", + "Размер: 9 строк, 5 колонок\n", + "--------------------------------------------------\n", + "Оригинальная таблица:\n", + " Date Name Nationality Tonnage\\n(GRT) \\\n", + "0 18 January 1940 Flandria Sweden 1,179 \n", + "1 19 January 1940 Patria Sweden 1,188 \n", + "2 11 February 1940 Linda Estonia 1,213 \n", + "3 4 May 1940 San Tiburcio United Kingdom 5,995 \n", + "4 9 May 1940 Doris French Navy 552 \n", + "5 11 May 1940 Tringa United Kingdom 1,930 \n", + "6 11 May 1940 Viiu Estonia 1,908 \n", + "7 23 May 1940 Sigurd Faulbaum Belgium 3,256 \n", + "8 11 May 1944 Shtorm Soviet Union 412 \n", + "\n", + " Fate \n", + "0 Sunk \n", + "1 Sunk \n", + "2 Sunk \n", + "3 Sunk (mine) \n", + "4 Sunk \n", + "5 Sunk \n", + "6 Sunk \n", + "7 Sunk \n", + "8 Damaged \n", + "Очищенная таблица:\n", + " Date Name Nationality Tonnage (GRT) \\\n", + "0 18 January 1940 Flandria Sweden 1179 \n", + "1 19 January 1940 Patria Sweden 1188 \n", + "2 11 February 1940 Linda Estonia 1213 \n", + "3 4 May 1940 San Tiburcio United Kingdom 5995 \n", + "4 9 May 1940 Doris French Navy 552 \n", + "5 11 May 1940 Tringa United Kingdom 1930 \n", + "6 11 May 1940 Viiu Estonia 1908 \n", + "7 23 May 1940 Sigurd Faulbaum Belgium 3256 \n", + "8 11 May 1944 Shtorm Soviet Union 412 \n", + "\n", + " Fate Date_datetime \n", + "0 Sunk 1940-01-18 \n", + "1 Sunk 1940-01-19 \n", + "2 Sunk 1940-02-11 \n", + "3 Sunk (mine) 1940-05-04 \n", + "4 Sunk 1940-05-09 \n", + "5 Sunk 1940-05-11 \n", + "6 Sunk 1940-05-11 \n", + "7 Sunk 1940-05-23 \n", + "8 Damaged 1944-05-11 \n", + "\n", + "==================================================\n", + "\n" + ] + } + ], + "source": [ + "import random\n", + "\n", + "total_tables = len(train.context)\n", + "random.seed(42) # устанавливаем seed отдельно\n", + "random_indices = random.sample(range(total_tables), 40)\n", + "\n", + "for i, idx in enumerate(random_indices, start=1):\n", + " file_path = 'data/' + train.context.iloc[idx]\n", + " \n", + " try:\n", + " # Читаем таблицу\n", + " current_df = pd.read_csv(file_path)\n", + " clean_df = normalize_table(file_path)\n", + " print(f\"=== Таблица {i}/20 (Индекс в train: {idx}) ===\")\n", + " print(f\"Оригинальная таблица: {train.context.iloc[idx]}\")\n", + " print(f\"Размер: {current_df.shape[0]} строк, {current_df.shape[1]} колонок\")\n", + " print(\"-\" * 50)\n", + " \n", + " print(f\"Оригинальная таблица:\\n {current_df}\")\n", + " print(f\"Очищенная таблица:\\n {clean_df}\")\n", + " print(\"\\n\" + \"=\"*50 + \"\\n\")\n", + " \n", + " except Exception as e:\n", + " print(f\"Ошибка при чтении файла {file_path}: {e}\\n\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 269, + "id": "96fbf4b1-60f2-45d5-9b39-2dee058f9710", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
YearDivisionLeagueRegular SeasonPlayoffsOpen CupAvg. Attendance
020012USL A-League4th, WesternQuarterfinalsDid not qualify7169
120022USL A-League2nd, Pacific1st RoundDid not qualify6260
220032USL A-League3rd, PacificDid not qualifyDid not qualify5871
320042USL A-League1st, WesternQuarterfinals4th Round5628
420052USL First Division5thQuarterfinals4th Round6028
520062USL First Division11thDid not qualify3rd Round5575
620072USL First Division2ndSemifinals2nd Round6851
720082USL First Division11thDid not qualify1st Round8567
820092USL First Division1stSemifinals3rd Round9734
920102USSF D-2 Pro League3rd, USL (3rd)Quarterfinals3rd Round10727
\n", + "
" + ], + "text/plain": [ + " Year Division League Regular Season Playoffs \\\n", + "0 2001 2 USL A-League 4th, Western Quarterfinals \n", + "1 2002 2 USL A-League 2nd, Pacific 1st Round \n", + "2 2003 2 USL A-League 3rd, Pacific Did not qualify \n", + "3 2004 2 USL A-League 1st, Western Quarterfinals \n", + "4 2005 2 USL First Division 5th Quarterfinals \n", + "5 2006 2 USL First Division 11th Did not qualify \n", + "6 2007 2 USL First Division 2nd Semifinals \n", + "7 2008 2 USL First Division 11th Did not qualify \n", + "8 2009 2 USL First Division 1st Semifinals \n", + "9 2010 2 USSF D-2 Pro League 3rd, USL (3rd) Quarterfinals \n", + "\n", + " Open Cup Avg. Attendance \n", + "0 Did not qualify 7169 \n", + "1 Did not qualify 6260 \n", + "2 Did not qualify 5871 \n", + "3 4th Round 5628 \n", + "4 4th Round 6028 \n", + "5 3rd Round 5575 \n", + "6 2nd Round 6851 \n", + "7 1st Round 8567 \n", + "8 3rd Round 9734 \n", + "9 3rd Round 10727 " + ] + }, + "execution_count": 269, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import importlib\n", + "import normalize # импортируем сам модуль целиком\n", + "\n", + "# Принудительно перезагружаем его в памяти\n", + "importlib.reload(normalize)\n", + "\n", + "# Заново вытаскиваем обновленную функцию\n", + "from normalize import normalize_table\n", + "# Сюда будем складывать чистые датафреймы, либо сразу сохранять на диск\n", + "processed_tables = {}\n", + "file_path = 'data/' + train.context.iloc[0]\n", + "\n", + "# Запускаем нашу функцию\n", + "clean_df = normalize_table(file_path)\n", + "\n", + "if not clean_df.empty:\n", + " # Сохраняем результат в словарь по индексу\n", + " processed_tables[idx] = clean_df\n", + " \n", + "clean_df" + ] + }, + { + "cell_type": "code", + "execution_count": 271, + "id": "3df13ff5-a3c4-4b07-b76b-ec1de8509492", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Year Division League Regular Season Playoffs \\\n", + "0 2001 2 USL A-League 4th, Western Quarterfinals \n", + "1 2002 2 USL A-League 2nd, Pacific 1st Round \n", + "2 2003 2 USL A-League 3rd, Pacific Did not qualify \n", + "3 2004 2 USL A-League 1st, Western Quarterfinals \n", + "4 2005 2 USL First Division 5th Quarterfinals \n", + "5 2006 2 USL First Division 11th Did not qualify \n", + "6 2007 2 USL First Division 2nd Semifinals \n", + "7 2008 2 USL First Division 11th Did not qualify \n", + "8 2009 2 USL First Division 1st Semifinals \n", + "9 2010 2 USSF D-2 Pro League 3rd, USL (3rd) Quarterfinals \n", + "\n", + " Open Cup Avg. Attendance \n", + "0 Did not qualify 7,169 \n", + "1 Did not qualify 6,260 \n", + "2 Did not qualify 5,871 \n", + "3 4th Round 5,628 \n", + "4 4th Round 6,028 \n", + "5 3rd Round 5,575 \n", + "6 2nd Round 6,851 \n", + "7 1st Round 8,567 \n", + "8 3rd Round 9,734 \n", + "9 3rd Round 10,727 \n" + ] + } + ], + "source": [ + "print(pd.read_csv('data/' + train.context.iloc[0]))" + ] + }, + { + "cell_type": "code", + "execution_count": 251, + "id": "d8636de9-d99d-481b-a6f9-2cf7bc28c241", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Nicky English'" + ] + }, + "execution_count": 251, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "top_player = df[\"Player\"].value_counts().idxmax()\n", + "appearance_count = df[\"Player\"].value_counts().max()\n", + "top_player" + ] + }, + { + "cell_type": "code", + "execution_count": 107, + "id": "8a26ce42-4604-456e-ae54-11e5d90d40f1", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "10,000 m\n" + ] + } + ], + "source": [ + "# Создаем колонку с числовым значением дистанции\n", + "df['distance_num'] = df['Notes'].str.replace(',', '').str.extract(r'(\\d+)').astype(int)\n", + "\n", + "# Фильтруем All-Africa Games и берем строку с максимальной дистанцией\n", + "result = df[df['Competition'] == 'All-Africa Games'].loc[lambda x: x['distance_num'].idxmax()]\n", + "\n", + "print(result['Notes'])" + ] + }, + { + "cell_type": "code", + "execution_count": 265, + "id": "88d0e623-f039-4e0a-ac98-561f16f16084", + "metadata": {}, + "outputs": [], "source": [ - "! pip freeze > requirements.txt" + "%load_ext autoreload\n", + "%autoreload 2" ] }, { "cell_type": "code", "execution_count": null, - "id": "c752e932-710b-4e0c-ac77-d0ec621c0e9c", + "id": "50d94380-c605-4737-a498-063c2dced377", "metadata": {}, "outputs": [], "source": [] diff --git a/WTQ/Qwen/normalize.py b/WTQ/Qwen/normalize.py new file mode 100644 index 0000000..38132d0 --- /dev/null +++ b/WTQ/Qwen/normalize.py @@ -0,0 +1,116 @@ +import re +import io +import numpy as np +import pandas as pd + +def normalize_table(file_path: str) -> pd.DataFrame: + """ + Выполняет полный цикл отказоустойчивого чтения, глобальной очистки + и нормализации признаков для одной "дикой" веб-таблицы. + """ + # ===================================================================== + # ШАГ 1: Отказоустойчивое чтение файла (Защита от ошибок токенизации) + # ===================================================================== + try: + df = pd.read_csv(file_path) + except Exception: + try: + # Если упало (разное количество колонок в строках), чистим сырой текст + with open(file_path, 'r', encoding='utf-8') as f: + lines = f.readlines() + if not lines: + return pd.DataFrame() + + header_cnt = len(lines[0].split(',')) + clean_lines = [] + for line in lines: + parts = line.split(',') + if len(parts) > header_cnt: + # Склеиваем избыточные элементы (хвост строки) в одну ячейку + parts = parts[:header_cnt-1] + [" ".join(parts[header_cnt-1:])] + clean_lines.append(",".join(parts)) + df = pd.read_csv(io.StringIO("".join(clean_lines))) + except Exception: + # Если файл абсолютно поврежден, возвращаем пустой DF, чтобы не рушить цикл + return pd.DataFrame() + + if df.empty: + return df + + # ===================================================================== + # ШАГ 2: Глобальная очистка структуры, пробелов и псевдо-NaN + # ===================================================================== + # 1. Вычищаем системные переносы строк (\n) из названий колонок + df.columns = df.columns.str.replace(r'\n', ' ', regex=True).str.strip() + + # 2. Приводим всё к строкам для безопасного regex-анализа, сохраняя оригинальные NaN + df = df.astype(str).replace(r'^nan$', np.nan, regex=True) + + # 3. Заменяем текстовые маркеры пропусков на честный np.nan + garbage_nas = [r'^——$', r'^none$', r'^NaN$', r'^—$', r'^\s*$'] + for pattern in garbage_nas: + df = df.replace(pattern, np.nan, regex=True) + + # 4. Убираем \n внутри самих ячеек и срезаем пробелы по краям + for col in df.columns: + if df[col].dtype == 'object': + df[col] = df[col].str.replace(r'\n', ' ', regex=True).str.strip() + + # ===================================================================== + # ШАГ 3: Умная нормализация и деструктуризация колонок + # ===================================================================== + original_columns = list(df.columns) + + for col in original_columns: + col_filled = df[col].dropna() + if col_filled.empty: + continue + + total_filled = len(col_filled) + + # --- Эвристика А: Текст со скобками -> "Tommy Persson (SWE)" или "Raymond Felton (17)" --- + has_parentheses = col_filled.str.contains(r'.+?\s*\(.+?\)', regex=True) + if has_parentheses.sum() > total_filled * 0.5: + extracted = df[col].str.extract(r'(.+?)\s*\((.+?)\)') + df[f"{col}_base"] = extracted[0].str.strip() + df[f"{col}_meta"] = extracted[1].str.strip() + continue + + # --- Эвристика Б: Счета матчей и результаты -> "W 28–21", "17-5", "2–4 (aet)" --- + has_scores = col_filled.str.contains(r'\d+[-–:–]\d+', regex=True) + if has_scores.sum() > total_filled * 0.5: + extracted = df[col].str.extract(r'(?:[A-Za-z]\s+)?(\d+)[\–\-:–](\d+)') + df[f"{col}_score1"] = pd.to_numeric(extracted[0], errors='coerce') + df[f"{col}_score2"] = pd.to_numeric(extracted[1], errors='coerce') + continue + + # --- Эвристика В: Имперские длины -> 47'9" --- + has_imperial = col_filled.str.contains(r'\d+\'\d+"', regex=True) + if has_imperial.sum() > total_filled * 0.3: + def _to_meters(val): + if pd.isna(val): return np.nan + match = re.match(r'(\d+)\'(\d+)"', str(val)) + if match: + feet, inches = map(int, match.groups()) + return round((feet * 0.3048) + (inches * 0.0254), 2) + return np.nan + df[f"{col}_meters"] = df[col].apply(_to_meters) + continue + + # --- Эвристика Г: Авто-приведение типов к Numeric (валюты, разделители тысяч) --- + # Удаляем знаки доллара и запятые (например, "18,108" -> "18108") + clean_num_attempt = df[col].str.replace(r'[\$,]', '', regex=True) + numeric_converted = pd.to_numeric(clean_num_attempt, errors='coerce') + if numeric_converted.notna().sum() > total_filled * 0.8: + df[col] = numeric_converted + continue + + # --- Эвристика Д: Даты ("June 15", "11/09/2013*", "September 13, 1987") --- + if 'date' in col.lower() or 'year' in col.lower(): + # Очищаем маркеры сносок вроде астерисков в конце даты ("2013*" -> "2013") + clean_date_attempt = df[col].str.replace(r'\*$', '', regex=True) + date_converted = pd.to_datetime(clean_date_attempt, errors='coerce') + if date_converted.notna().sum() > total_filled * 0.5: + df[f"{col}_datetime"] = date_converted + + return df \ No newline at end of file From e633a2ab73a48b0e83692f3b20a4830a9f202bde Mon Sep 17 00:00:00 2001 From: master Date: Fri, 22 May 2026 19:31:29 +0800 Subject: [PATCH 05/10] =?UTF-8?q?=D0=B4=D0=BE=D0=B1=D0=B0=D0=B2=D0=BB?= =?UTF-8?q?=D0=B5=D0=BD=20=D0=BD=D0=BE=D1=80=D0=BC=D0=B0=D0=BB=D0=B8=D0=B7?= =?UTF-8?q?=D0=B0=D1=82=D0=BE=D1=80?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 4 ++ WTQ/Qwen/check_result.py | 82 ++++++++++++++++++++++++ WTQ/Qwen/config.py | 15 +++-- WTQ/Qwen/get_pandas_code.py | 44 +++++++++++++ WTQ/Qwen/main.py | 95 ++++++++++++++++++++++++++++ WTQ/Qwen/send_message.py | 53 ++++++++++++++++ utils/normalize.py | 118 +++++++++++++++++++++++++++++++++++ utils/type_check.py | 5 +- utils/type_check_duckling.py | 19 +++--- 9 files changed, 418 insertions(+), 17 deletions(-) create mode 100644 WTQ/Qwen/check_result.py create mode 100644 WTQ/Qwen/get_pandas_code.py create mode 100644 WTQ/Qwen/main.py create mode 100644 WTQ/Qwen/send_message.py create mode 100644 utils/normalize.py diff --git a/.gitignore b/.gitignore index 9078939..cdff11d 100644 --- a/.gitignore +++ b/.gitignore @@ -139,3 +139,7 @@ cython_debug/ # idea .idea/ +/datasets/WikiTableQuestions/ +/utils/draft.py +/utils/Amazon Sale Report.csv +/utils/draft_2.py diff --git a/WTQ/Qwen/check_result.py b/WTQ/Qwen/check_result.py new file mode 100644 index 0000000..cf3c3aa --- /dev/null +++ b/WTQ/Qwen/check_result.py @@ -0,0 +1,82 @@ +from utils.normalize import convert_dataset_types +import pandas as pd +import numpy as np +import re +import json + + +import re +import json + +def extract_code_from_response(response): + if not response: + return response + + # 1. Поиск JSON: от первой { до последней } + start = response.find('{') + end = response.rfind('}') + if start != -1 and end != -1: + # Пробуем отрезать от 0 до 5 лишних символов в конце + for cut in range(0, 6): + candidate = response[start:end+1-cut] + try: + data = json.loads(candidate) + if 'PANDA' in data: + code = data['PANDA'] + # Очистка кода от мусора в конце + code = re.sub(r'[\]\}]+$', '', code) # удалить ] } в конце + # Если код обёрнут в квадратные скобки, снимаем их + if code.startswith('[') and code.endswith(']'): + code = code[1:-1].strip() + return code + except: + continue + + # 2. Fallback: регулярное выражение, захватывающее всё до последней кавычки перед } или ] + match = re.search(r'"PANDA"\s*:\s*"([^"]*)"\s*[\}\]\]]', response, re.DOTALL) + if match: + code = match.group(1).strip() + code = re.sub(r'[\]\}]+$', '', code) + return code + + return response + + +def normalize_value(value): + df_temp = pd.DataFrame({'col': [value]}) + df_norm = convert_dataset_types(df_temp) + return df_norm['col'].iloc[0] + + +def evaluate_code(code_str, df, expected): + # Нормализуем таблицу + df_norm = convert_dataset_types(df) + + # Извлекаем чистый код из ответа модели + clean_code = extract_code_from_response(code_str) + + # Выполняем код на нормализованной таблице + try: + result = eval(clean_code, {'df': df_norm, 'pd': pd, 'np': np}) + except Exception as e: + print(f"Execution error: {e}") + return False + + # Приводим результат к единому формату (строка, разделитель для списков) + if isinstance(result, list): + norm_result = '|'.join(str(x) for x in result) + elif isinstance(result, pd.Series): + if len(result) == 1: + norm_result = normalize_value(result.iloc[0]) + else: + norm_result = '|'.join(str(x) for x in result.tolist()) + else: + norm_result = normalize_value(result) + + # Нормализуем ожидаемый ответ + try: + norm_expected = normalize_value(expected) + except Exception: + norm_expected = expected + + return norm_result == norm_expected \ No newline at end of file diff --git a/WTQ/Qwen/config.py b/WTQ/Qwen/config.py index f4a43ba..1540ed7 100644 --- a/WTQ/Qwen/config.py +++ b/WTQ/Qwen/config.py @@ -7,12 +7,18 @@ 3. Ensure the output is concise, correct, and when run, it outputs the correct given answer, and strictly follows the Json format: {{"PANDA": ""}} -### Table schema +If column names have spaces or special characters, use df['column name'] +Use pd.to_datetime() for date comparisons +Return the actual value (not index or position) + +### Table Schema: {table} -### Query -{query} +### Column Data Types: +{column_types} +### Query: +{query} ''' logic_prompt = '''You are an expert in Python with a specialization in pandas. Your task is to verify and correct a given pandas code that translates a natural language statement into a pandas expression. The corrected pandas code must accurately evaluate the truth of the statement when applied to the given table. Requirements: @@ -53,5 +59,4 @@ ### Label {label} -''' - +''' \ No newline at end of file diff --git a/WTQ/Qwen/get_pandas_code.py b/WTQ/Qwen/get_pandas_code.py new file mode 100644 index 0000000..14b0293 --- /dev/null +++ b/WTQ/Qwen/get_pandas_code.py @@ -0,0 +1,44 @@ +from config import system_prompt +import pandas as pd +from send_message import send_message_async, ModelMessageDict +import asyncio + + +async def get_pandas(question, tbl, max_rows=20): + tbl_input = tbl.head(max_rows) if len(tbl) > max_rows else tbl + + # Получаем типы данных колонок + column_types = {col: str(dtype) for col, dtype in tbl_input.dtypes.items()} + types_info = "\n".join([f" - {col}: {dtype}" for col, dtype in column_types.items()]) + + # Форматируем system_prompt с таблицей, типами и вопросом + formatted_system_prompt = system_prompt.format( + table=tbl_input.to_string(index=False), + column_types=types_info, + query=question + ) + + # Формируем сообщение пользователя + user_msg = ModelMessageDict(role='user') + user_msg.add_text_content( + f"QUESTION: {question}\n" + f"AVAILABLE COLUMNS: {', '.join(tbl_input.columns)}\n" + f"COLUMN TYPES:\n{types_info}\n" + f"TABLE DATA:\n{tbl_input.to_string(index=False)}" + ) + + success, responses = await send_message_async( + messages=[ + {"role": "system", "content": formatted_system_prompt}, + user_msg + ], + base_url="http://192.168.19.127:9886/v1", + api_key='EMPTY', + model_name='Qwen/Qwen3-4B-Instruct-2507', + temperature=0, + ) + + if not success: + return None, f"LLM error: {responses}" + + return responses[0] \ No newline at end of file diff --git a/WTQ/Qwen/main.py b/WTQ/Qwen/main.py new file mode 100644 index 0000000..c7fb04f --- /dev/null +++ b/WTQ/Qwen/main.py @@ -0,0 +1,95 @@ +import pandas as pd +import numpy as np +import asyncio +import random +from get_pandas_code import get_pandas +from check_result import evaluate_code +from config import system_prompt +from utils.normalize import convert_dataset_types +from check_result import extract_code_from_response + + +train = pd.read_csv('../../datasets/WikiTableQuestions/training.tsv', sep='\t') + +# Выбираем случайные индексы +random.seed(101) +random_indices = random.sample(range(len(train)), 20) + + +def run_code_and_get_result(code_str, df): + """Выполняет код на нормализованной таблице и возвращает (результат, ошибка)""" + df_norm = convert_dataset_types(df) + clean_code = extract_code_from_response(code_str) + try: + result = eval(clean_code, {'df': df_norm, 'pd': pd, 'np': np}) + return result, None + except Exception as e: + return None, str(e) + + +async def main(): + results = [] + syntax_errors = 0 + correct_answers = 0 + total = len(random_indices) + + for i, idx in enumerate(random_indices, 1): + print(f"\n{'='*80}") + print(f"--- Test {i}/{total} (train index: {idx}) ---") + + csv = pd.read_csv('../../datasets/WikiTableQuestions/' + train.context.iloc[idx]) + question = train.utterance.iloc[idx] + targetValue = train.targetValue.iloc[idx] + + # Нормализуем таблицу один раз + df_norm = convert_dataset_types(csv) + + # Передаём в get_pandas нормализованную таблицу + code = await get_pandas(question, df_norm) + print(f"Generated code: {code}") + + # Выполняем код на той же нормализованной таблице + result_val, error = run_code_and_get_result(code, df_norm) + print(f"Execution result: {result_val}") + if error: + print(f"Execution error: {error}") + syntax_errors += 1 + + print(f"Expected value: {targetValue}") + + # Выводим информацию о таблицах (оригинальной и нормализованной) + print("\n--- Original table (first 5 rows) ---") + print(csv.head().to_string()) + print("\n--- Normalized table (first 5 rows) ---") + print(df_norm.head().to_string()) + print("\n--- Column types (original) ---") + print(csv.dtypes.to_string()) + print("\n--- Column types (normalized) ---") + print(df_norm.dtypes.to_string()) + + # Проверяем совпадение + match = evaluate_code(code, df_norm, targetValue) + print(f"Match: {match}") + + if match: + correct_answers += 1 + results.append(match) + + # Статистика + print("\n" + "="*80) + print("СТАТИСТИКА") + print("="*80) + print(f"Всего запусков: {total}") + print(f"С синтаксической ошибкой: {syntax_errors}") + print(f"Без синтаксической ошибки: {total - syntax_errors}") + print(f"Правильных ответов: {correct_answers}") + print(f"Точность (от всех запусков): {correct_answers / total * 100:.1f}%") + print(f"Точность (только без ошибок): {correct_answers / (total - syntax_errors) * 100:.1f}%" if syntax_errors < total else "Точность (только без ошибок): N/A") + print("="*80) + + return results + + +if __name__ == "__main__": + results = asyncio.run(main()) + print(f"\nFinal results: {results}") \ No newline at end of file diff --git a/WTQ/Qwen/send_message.py b/WTQ/Qwen/send_message.py new file mode 100644 index 0000000..dfc6494 --- /dev/null +++ b/WTQ/Qwen/send_message.py @@ -0,0 +1,53 @@ +from openai import OpenAI, AsyncOpenAI +import base64 +from typing import List, Dict, Any, Callable +import inspect +def get_kwargs(kwargs: Dict[str, Any], func: Callable) -> Dict[str, Any]: + """Вытаскивает аргументы из kwargs по сигнатуре функции func.""" + sig = inspect.signature(func) + return {key: value for key, value in kwargs.items() if key in sig.parameters} + +async def send_message_async(messages, base_url: str = "http://192.168.19.127:9886/v1", + api_key: str = 'EMPTY', + model_name: str = 'Qwen/Qwen3-4B-Instruct-2507', + **kwargs): + """Асинхронная версия отправки сообщений.""" + client = AsyncOpenAI(api_key=api_key, base_url=base_url, **get_kwargs(kwargs, AsyncOpenAI)) + try: + print(f"Generating content with model: {model_name}") + response = await client.chat.completions.create( + messages=messages, + model=model_name, + **get_kwargs(kwargs, client.chat.completions.create) + ) + return True, [answ.message.content for answ in response.choices] + except Exception as e: + print("Failed to call LLM: " + str(e)) + return False, None + +class ModelMessageDict(dict): + """Класс - словарь для удобного форматирования запроса к модели.""" + + def __init__(self, role: str = 'user'): + super().__init__() + self['role'] = role + self['content'] = '' + + def add_text_content(self, content: str): + self['content']+= content + + def add_img_content(self, source: str = 'image_url', path_to_img: str = None, url: str = None): + match source: + case 'image_url': + if path_to_img is not None: + with open(path_to_img, "rb") as f: + base64_image = base64.b64encode(f.read()).decode() + self['content'].append({ + 'type': 'image_url', + 'image_url': {'url': f"data:image/jpeg;base64,{base64_image}"} + }) + elif url is not None: + self['content'].append({ + 'type': 'image_url', + 'image_url': {'url': url} + }) \ No newline at end of file diff --git a/utils/normalize.py b/utils/normalize.py new file mode 100644 index 0000000..79155de --- /dev/null +++ b/utils/normalize.py @@ -0,0 +1,118 @@ +import re, pandas as pd, numpy as np +import sys + +sys.path.insert(0, '/home/master/PycharmProjects/semtab_serializer') + +from utils.type_check import analyze_dataset_parallel, clean_value, extract_number_string + + +def clean_column_name(col): + """Заменяет спецсимволы в имени колонки на _""" + # Заменяем переводы строк, скобки, пробелы, дефисы и другие спецсимволы на _ + col = re.sub(r'[\n\r\t\(\)\[\]\{\}\-\s]+', '_', str(col)) + # Убираем дублирующиеся подчеркивания + col = re.sub(r'_+', '_', col) + # Убираем подчеркивание в начале и конце + col = col.strip('_') + return col + + +def convert_value(v, t, keep_original_on_error=True): + # Если это специальный пропуск - возвращаем pd.NA + if pd.isna(v) or (isinstance(v, str) and v.lower() in ['nan', 'na', 'n/a', 'nill', 'none', '—', '?']): + return pd.NA + + # Сохраняем оригинал для возможного возврата + original = v + s = clean_value(v) + if not s: + return original if keep_original_on_error else pd.NA + + if t == 'bool': + if re.match(r'^(true|yes|да|истина|1|\+)$', s, re.I): + return True + if re.match(r'^(false|no|нет|ложь|0|-|\[ \])$', s, re.I): + return False + return original if keep_original_on_error else pd.NA + + if t == 'int': + c = re.sub(r'[ ,]', '', extract_number_string(s)) + # Проверка на диапазон (содержит тире/минус не в начале) + if re.search(r'\d+[–-]\d+', c): + return original if keep_original_on_error else pd.NA + try: + if '.' in c: + result = int(float(c)) if re.match(r'^\d+\.0+$', c) else None + return result if result is not None else (original if keep_original_on_error else pd.NA) + return int(c) + except: + return original if keep_original_on_error else pd.NA + + if t == 'float': + c = extract_number_string(s) + # Проверка на диапазон + if re.search(r'\d+[–-]\d+', c): + return original if keep_original_on_error else pd.NA + if ',' in c and '.' not in c: + c = c.replace(',', '.') + c = re.sub(r'[ ,]', '', c) + try: + return float(c) + except: + return original if keep_original_on_error else pd.NA + + if t in ('date', 'datetime'): + try: + r = pd.to_datetime(s, errors='coerce') + if pd.notna(r): + return r.normalize() if t == 'date' else r + return original if keep_original_on_error else pd.NA + except: + return original if keep_original_on_error else pd.NA + + if t == 'time': + for p in [re.compile(x) for x in [r'^\d{1,2}:\d{1,2}$', r'^\d{1,2}:\d{1,2}:\d{1,2}$', + r'^\d{1,2}:\d{1,2}:\d{1,2}\.\d+$', r'^\d{1,2}:\d{1,2}\s*[APap][Mm]$', + r'^\d{1,2}:\d{1,2}:\d{1,2}\s*[APap][Mm]$']]: + if p.match(s): + try: + dt = pd.to_datetime('1970-01-01 ' + s, errors='coerce') + return dt.time() if pd.notna(dt) else (original if keep_original_on_error else pd.NA) + except: + continue + return original if keep_original_on_error else pd.NA + + return s + + +def convert_col(s, t): + # Для колонок с типом float/int применяем специальную логику + # которая сохранит оригинальные строки для непреобразованных значений + if t in ('float', 'int', 'bool', 'date', 'datetime', 'time'): + # Применяем convert_value с сохранением оригинала + converted = s.apply(lambda x: convert_value(x, t, keep_original_on_error=True)) + + # Проверяем, не получилась ли колонка со смешанными типами + # Если да - оставляем как есть (строками где не преобразовалось) + return converted + + # Для строк - просто приводим к типу string + return s.astype('string') + + +def convert_dataset_types(df, max_workers=None): + info = analyze_dataset_parallel(df, max_workers) + res = df.copy() + + # Переименовываем колонки, заменяя спецсимволы на _ + res.columns = [clean_column_name(col) for col in res.columns] + + for col, (t, _) in info.items(): + # Находим новое имя колонки после переименования + new_col = clean_column_name(col) + if t != 'None' and new_col in res.columns: + try: + res[new_col] = convert_col(df[col], t) + except: + pass + return res \ No newline at end of file diff --git a/utils/type_check.py b/utils/type_check.py index c7e7450..be379cd 100644 --- a/utils/type_check.py +++ b/utils/type_check.py @@ -23,6 +23,9 @@ def clean_value(value: Any) -> str: def extract_number_string(s: str) -> str: """Подготовка строки для проверки на число (удаление валют, скобок, текста)""" + # Нормализация разных видов минусов и дефисов + s = s.replace('−', '-').replace('–', '-').replace('—', '-') + # Удаляем символы валют в начале/конце s = re.sub(r'^[$€£¥₽\s]*', '', s) s = re.sub(r'[$€£¥₽\s]*$', '', s) @@ -111,7 +114,7 @@ def check_type_comprehensive(data: Union[pd.Series, list, Any]) -> Tuple[str, in for value in values: if (pd.isna(value) or - (isinstance(value, str) and value.lower() in ['nan', 'na', 'n/a','nill'])): + (isinstance(value, str) and value.lower() in ['nan', 'na', 'n/a', 'nill', 'none','—','?'])): nan_count += 1 continue diff --git a/utils/type_check_duckling.py b/utils/type_check_duckling.py index 01ff9f5..7a9bc38 100644 --- a/utils/type_check_duckling.py +++ b/utils/type_check_duckling.py @@ -3,22 +3,19 @@ import requests import pandas as pd import json +import requests +# Создай сессию один раз глобально +session = requests.Session() def parse_text(text: Any, locale: str = 'en_US') -> List[Dict[str, Any]]: - """ - Парсит текст через Duckling. - - :param text: Текст для парсинга. Может быть любым типом, который может быть преобразован в str. - :param locale: Локаль для парсинга (по умолчанию 'en_US'). - :return: Список словарей, представляющих результаты парсинга Duckling. - """ text_str = str(text) if pd.notna(text) else "" try: - response = requests.post( + response = session.post( 'http://localhost:8000/parse', - data={'locale': locale, 'text': text_str} + data={'locale': locale, 'text': text_str}, + timeout=5 ) response.raise_for_status() return response.json() @@ -79,8 +76,8 @@ def df_to_duckling(df: pd.DataFrame, lines: int = 5) -> Dict[str, Tuple[str, int return aggregated_results -df = pd.read_csv('C:/Users/PC/semtab_serializer/tests/Amazon Sale Report.csv') -result = df_to_duckling(df, 3) +df = pd.read_csv('Amazon Sale Report.csv') +result = df_to_duckling(df, len(df)) for col, types_list in result.items(): print(col, types_list) From fb6e7ac449adb2c04e8b57e96d047e1b89a42f7a Mon Sep 17 00:00:00 2001 From: master Date: Sat, 23 May 2026 17:33:10 +0800 Subject: [PATCH 06/10] =?UTF-8?q?=D0=94=D0=BE=D0=B1=D0=B0=D0=B2=D0=BB?= =?UTF-8?q?=D0=B5=D0=BD=D1=8B=20=D0=BF=D1=80=D0=B0=D0=B2=D0=BA=D0=B8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- WTQ/Qwen/config.py | 49 +++++++++------ WTQ/Qwen/get_pandas_code.py | 34 ++++------ WTQ/Qwen/main.py | 120 +++++++++++++++--------------------- utils/normalize.py | 46 ++++++++------ utils/type_check.py | 3 +- 5 files changed, 121 insertions(+), 131 deletions(-) diff --git a/WTQ/Qwen/config.py b/WTQ/Qwen/config.py index 1540ed7..8d3ffd2 100644 --- a/WTQ/Qwen/config.py +++ b/WTQ/Qwen/config.py @@ -1,24 +1,37 @@ -system_prompt = '''You are a Python expert specializing in pandas. You are given a question and a table. Your task is to translate the given natural language question into -a single-line pandas expression. This expression, which acts like a query, must -be valid and executable so that running the pandas expression will output the -answer to the question. Consider the following: +system_prompt = ''' +You are a Python expert specializing in pandas. You are given a question and a table. Your task is to translate the given natural language question into a single-line pandas expression. This expression, which acts like a query, must be valid and executable so that running the pandas expression will output the answer to the question. Consider the following: 1. The table is represented as a pandas DataFrame named df. 2. Do not include explanations, comments, or multiline outputs. -3. Ensure the output is concise, correct, and when run, it outputs the correct -given answer, and strictly follows the Json format: {{"PANDA": ""}} +3. Ensure the output is concise, correct, and when run, it outputs the correct given answer, and strictly follows the Json format: {"PANDA": ""} +4. Use double quotes inside the pandas code and escape them with backslash. Example: df["Column"] not df['Column']. +5. Do not use double curly braces {{ }}. Use single curly braces { }. +6. Always use convert_type('...') for numeric and date constants from the question, even if they look like plain numbers. Example: df[df['Year'] == convert_type('2005')] not df[df['Year'] == 2005]. +Use convert_type, which converts string values into appropriate types: numbers (int/float) with automatic removal of extra characters (spaces, currencies, percentages, thousand separators, parentheses) and dates/times into pandas Timestamp recognizing various formats (ISO, European, American, with month names). If conversion is impossible, the function returns the original value unchanged. +Signature: convert_type(value: Any) -> Any +When to apply convert_type (only to constants from the question): +Filtering by number: df[df['Year'] == convert_type('2005')] +Filtering by range: df[df['Points'] > convert_type('79')] +Filtering by date: df[df['Date'] > convert_type('2000-01-01')] +Arithmetic with constants: convert_type('1000') + df['Bonus'] +String constant comparisons do NOT require convert_type: df[df['Team'] == 'Crettyard'] +Examples: + +Question: "Which team scored the most points?" +No constants from the question → convert_type not needed. +Answer: {{"PANDA": "df.loc[df['Points'].idxmax(), 'Team']"}} + +Question: "How many points did the team score in 2005?" +Answer: {{"PANDA": "df[df['Year'] == convert_type('2005')]['Points'].iloc[0]"}} + +Question: "What was the average attendance in 2005?" +Answer: {{"PANDA": "df[df['Year'] == convert_type('2005')]['Attendance'].mean()"}} + +Question: "How many years passed between 1996 and the last year when Republicans had a majority?" +Answer: {{"PANDA": "convert_type('1996') - df[df['Republican Party'] > df['Democratic Party']]['Year'].iloc[-2]"}} + +Question: "How many more passengers flew to Los Angeles than to Saskatoon?" +Answer: {{"PANDA": "df[df['City'] == 'United States, Los Angeles']['Passengers'].iloc[0] - df[df['City'] == 'Canada, Saskatoon']['Passengers'].iloc[0]"}} (values from the table are already numbers) -If column names have spaces or special characters, use df['column name'] -Use pd.to_datetime() for date comparisons -Return the actual value (not index or position) - -### Table Schema: -{table} - -### Column Data Types: -{column_types} - -### Query: -{query} ''' logic_prompt = '''You are an expert in Python with a specialization in pandas. Your task is to verify and correct a given pandas code that translates a natural language statement into a pandas expression. The corrected pandas code must accurately evaluate the truth of the statement when applied to the given table. Requirements: diff --git a/WTQ/Qwen/get_pandas_code.py b/WTQ/Qwen/get_pandas_code.py index 14b0293..cc6c658 100644 --- a/WTQ/Qwen/get_pandas_code.py +++ b/WTQ/Qwen/get_pandas_code.py @@ -2,40 +2,30 @@ import pandas as pd from send_message import send_message_async, ModelMessageDict import asyncio +from config import system_prompt +from utils.type_check import analyze_dataset_parallel -async def get_pandas(question, tbl, max_rows=20): - tbl_input = tbl.head(max_rows) if len(tbl) > max_rows else tbl - - # Получаем типы данных колонок - column_types = {col: str(dtype) for col, dtype in tbl_input.dtypes.items()} - types_info = "\n".join([f" - {col}: {dtype}" for col, dtype in column_types.items()]) +async def get_pandas(question: str, ser_tbl: str, tbl_types: dict, max_rows=20): - # Форматируем system_prompt с таблицей, типами и вопросом - formatted_system_prompt = system_prompt.format( - table=tbl_input.to_string(index=False), - column_types=types_info, - query=question - ) + system_message = ModelMessageDict(role = 'system') + system_message.add_text_content(system_prompt) - # Формируем сообщение пользователя - user_msg = ModelMessageDict(role='user') - user_msg.add_text_content( + user_message = ModelMessageDict(role='user') + user_message.add_text_content( f"QUESTION: {question}\n" - f"AVAILABLE COLUMNS: {', '.join(tbl_input.columns)}\n" - f"COLUMN TYPES:\n{types_info}\n" - f"TABLE DATA:\n{tbl_input.to_string(index=False)}" + f"AVAILABLE COLUMNS: {', '.join(list(tbl_types.keys()))}\n" + #f"COLUMN TYPES:\n{tbl_types}\n" + f"TABLE DATA:\n{ser_tbl}" ) success, responses = await send_message_async( - messages=[ - {"role": "system", "content": formatted_system_prompt}, - user_msg + messages=[system_message, user_message ], base_url="http://192.168.19.127:9886/v1", api_key='EMPTY', model_name='Qwen/Qwen3-4B-Instruct-2507', - temperature=0, + temperature=0.3, ) if not success: diff --git a/WTQ/Qwen/main.py b/WTQ/Qwen/main.py index c7fb04f..f289399 100644 --- a/WTQ/Qwen/main.py +++ b/WTQ/Qwen/main.py @@ -1,95 +1,75 @@ import pandas as pd import numpy as np import asyncio -import random +import json from get_pandas_code import get_pandas from check_result import evaluate_code from config import system_prompt -from utils.normalize import convert_dataset_types +from utils.normalize import convert_dataset_types,convert_type +from utils.utils import serialize_table_to_tapex_format from check_result import extract_code_from_response - +from utils.type_check import analyze_dataset_parallel train = pd.read_csv('../../datasets/WikiTableQuestions/training.tsv', sep='\t') -# Выбираем случайные индексы -random.seed(101) -random_indices = random.sample(range(len(train)), 20) - def run_code_and_get_result(code_str, df): """Выполняет код на нормализованной таблице и возвращает (результат, ошибка)""" df_norm = convert_dataset_types(df) clean_code = extract_code_from_response(code_str) try: - result = eval(clean_code, {'df': df_norm, 'pd': pd, 'np': np}) + result = eval(clean_code, {'df': df_norm, 'pd': pd, 'np': np,'convert_type':convert_type}) return result, None except Exception as e: return None, str(e) async def main(): - results = [] - syntax_errors = 0 - correct_answers = 0 - total = len(random_indices) - - for i, idx in enumerate(random_indices, 1): - print(f"\n{'='*80}") - print(f"--- Test {i}/{total} (train index: {idx}) ---") - - csv = pd.read_csv('../../datasets/WikiTableQuestions/' + train.context.iloc[idx]) - question = train.utterance.iloc[idx] - targetValue = train.targetValue.iloc[idx] - - # Нормализуем таблицу один раз - df_norm = convert_dataset_types(csv) - - # Передаём в get_pandas нормализованную таблицу - code = await get_pandas(question, df_norm) - print(f"Generated code: {code}") - - # Выполняем код на той же нормализованной таблице - result_val, error = run_code_and_get_result(code, df_norm) - print(f"Execution result: {result_val}") - if error: - print(f"Execution error: {error}") - syntax_errors += 1 - - print(f"Expected value: {targetValue}") - - # Выводим информацию о таблицах (оригинальной и нормализованной) - print("\n--- Original table (first 5 rows) ---") - print(csv.head().to_string()) - print("\n--- Normalized table (first 5 rows) ---") - print(df_norm.head().to_string()) - print("\n--- Column types (original) ---") - print(csv.dtypes.to_string()) - print("\n--- Column types (normalized) ---") - print(df_norm.dtypes.to_string()) - - # Проверяем совпадение - match = evaluate_code(code, df_norm, targetValue) - print(f"Match: {match}") - - if match: - correct_answers += 1 - results.append(match) - - # Статистика - print("\n" + "="*80) - print("СТАТИСТИКА") - print("="*80) - print(f"Всего запусков: {total}") - print(f"С синтаксической ошибкой: {syntax_errors}") - print(f"Без синтаксической ошибки: {total - syntax_errors}") - print(f"Правильных ответов: {correct_answers}") - print(f"Точность (от всех запусков): {correct_answers / total * 100:.1f}%") - print(f"Точность (только без ошибок): {correct_answers / (total - syntax_errors) * 100:.1f}%" if syntax_errors < total else "Точность (только без ошибок): N/A") - print("="*80) - - return results + json_result = [] + + for i in range(30,40): + df = pd.read_csv('../../datasets/WikiTableQuestions/' + train.iloc[i].context) + targetValue = train.iloc[i].targetValue + question = train.iloc[i].utterance + + norm_df = convert_dataset_types(df) + ser_df = serialize_table_to_tapex_format(norm_df) + df_types = analyze_dataset_parallel(df) + columns_types = {col: types[0] for col, types in df_types.items()} + + code = await get_pandas(question, ser_df, columns_types) + result = run_code_and_get_result(code, df) + error = None + if result[0] is not None: + result = result[0] + else: + error = result[1] + result = None + + json_result.append({ + 'id': i, + 'table': train.iloc[i].context, + 'code': code, + 'result': result, + 'error': error + }) + + # print(system_prompt) + # print('table ',ser_df) + # # print('coll types ',columns_types) + # print('question ',question) + # print(run_code_and_get_result(input(),df)) + # print(targetValue) + # print() + # print() + # print() + # print() + # print() + # print() + return json_result if __name__ == "__main__": - results = asyncio.run(main()) - print(f"\nFinal results: {results}") \ No newline at end of file + result = asyncio.run(main()) # ← сохраняем результат + for i in result: + print(i) diff --git a/utils/normalize.py b/utils/normalize.py index 79155de..26e42e4 100644 --- a/utils/normalize.py +++ b/utils/normalize.py @@ -3,18 +3,7 @@ sys.path.insert(0, '/home/master/PycharmProjects/semtab_serializer') -from utils.type_check import analyze_dataset_parallel, clean_value, extract_number_string - - -def clean_column_name(col): - """Заменяет спецсимволы в имени колонки на _""" - # Заменяем переводы строк, скобки, пробелы, дефисы и другие спецсимволы на _ - col = re.sub(r'[\n\r\t\(\)\[\]\{\}\-\s]+', '_', str(col)) - # Убираем дублирующиеся подчеркивания - col = re.sub(r'_+', '_', col) - # Убираем подчеркивание в начале и конце - col = col.strip('_') - return col +from utils.type_check import analyze_dataset_parallel, clean_value, extract_number_string,check_type_comprehensive def convert_value(v, t, keep_original_on_error=True): @@ -104,15 +93,32 @@ def convert_dataset_types(df, max_workers=None): info = analyze_dataset_parallel(df, max_workers) res = df.copy() - # Переименовываем колонки, заменяя спецсимволы на _ - res.columns = [clean_column_name(col) for col in res.columns] - for col, (t, _) in info.items(): - # Находим новое имя колонки после переименования - new_col = clean_column_name(col) - if t != 'None' and new_col in res.columns: + if t != 'None' and col in res.columns: try: - res[new_col] = convert_col(df[col], t) + res[col] = convert_col(df[col], t) except: pass - return res \ No newline at end of file + return res + +def convert_type(value): + type = check_type_comprehensive(value)[0] + res = value.copy() + + try: + res = convert_value(res,type) + except: + pass + return res +# train = pd.read_csv('../datasets/WikiTableQuestions/training.tsv', sep='\t') +# +# df = pd.read_csv('../datasets/WikiTableQuestions/' + train.iloc[1].context) +# print(df) +# print(analyze_dataset_parallel(df)) +# print('type',type(df.Year.iloc[0])) +# print(df.Year.sum()) +# +# norm_df = convert_dataset_types(df) +# print(norm_df.Year.sum()) +# print(type(df.Year.iloc[0])) +# print(type(convert_type(df.Year.iloc[0]))) \ No newline at end of file diff --git a/utils/type_check.py b/utils/type_check.py index be379cd..de5beb6 100644 --- a/utils/type_check.py +++ b/utils/type_check.py @@ -271,4 +271,5 @@ def analyze_dataset_parallel(dataset: pd.DataFrame, max_workers: Optional[int] = column_name, column_result = future.result() results[column_name] = column_result - return results \ No newline at end of file + return results + From e465acec6c8c10f8afb1068aeeb0b52163d3835a Mon Sep 17 00:00:00 2001 From: master Date: Mon, 25 May 2026 11:12:07 +0800 Subject: [PATCH 07/10] =?UTF-8?q?=D0=9F=D0=B5=D1=80=D0=B2=D1=8B=D0=B9=20?= =?UTF-8?q?=D0=B7=D0=B0=D0=BF=D1=83=D1=81=D0=BA=20=D0=B4=D0=BB=D1=8F=20?= =?UTF-8?q?=D0=B3=D0=B5=D0=BD=D0=B5=D1=80=D0=B0=D1=86=D0=B8=D0=B8=20=D0=BE?= =?UTF-8?q?=D0=B1=D1=83=D1=87=D0=B0=D1=8E=D1=89=D0=B5=D0=B3=D0=BE=20=D0=BD?= =?UTF-8?q?=D0=B0=D0=B1=D0=BE=D1=80=D0=B0.=2011515=20=D1=83=D1=81=D0=BF?= =?UTF-8?q?=D0=B5=D1=88=D0=BD=D1=8B=D1=85=20=D0=B7=D0=B0=D0=BF=D1=83=D1=81?= =?UTF-8?q?=D0=BA=D0=BE=D0=B2=20=D0=BA=D0=BE=D0=B4=D0=B0.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- WTQ/Qwen/check_result.py | 42 +--- WTQ/Qwen/config.py | 16 ++ WTQ/Qwen/get_pandas_code.py | 4 +- WTQ/Qwen/main.py | 242 ++++++++++++++++++---- WTQ/Qwen/train_results/show_statistics.py | 43 ++++ utils/normalize.py | 20 +- 6 files changed, 287 insertions(+), 80 deletions(-) create mode 100644 WTQ/Qwen/train_results/show_statistics.py diff --git a/WTQ/Qwen/check_result.py b/WTQ/Qwen/check_result.py index cf3c3aa..d6eb5a5 100644 --- a/WTQ/Qwen/check_result.py +++ b/WTQ/Qwen/check_result.py @@ -1,52 +1,30 @@ -from utils.normalize import convert_dataset_types +from utils.normalize import convert_dataset_types, convert_type import pandas as pd import numpy as np import re import json - -import re -import json - def extract_code_from_response(response): if not response: return response - # 1. Поиск JSON: от первой { до последней } - start = response.find('{') - end = response.rfind('}') - if start != -1 and end != -1: - # Пробуем отрезать от 0 до 5 лишних символов в конце - for cut in range(0, 6): - candidate = response[start:end+1-cut] - try: - data = json.loads(candidate) - if 'PANDA' in data: - code = data['PANDA'] - # Очистка кода от мусора в конце - code = re.sub(r'[\]\}]+$', '', code) # удалить ] } в конце - # Если код обёрнут в квадратные скобки, снимаем их - if code.startswith('[') and code.endswith(']'): - code = code[1:-1].strip() - return code - except: - continue + # Самое простое решение: ищем между "PANDA": " и следующей кавычкой перед } + pattern = r'"PANDA"\s*:\s*"([^"]+)"' + match = re.search(pattern, response) - # 2. Fallback: регулярное выражение, захватывающее всё до последней кавычки перед } или ] - match = re.search(r'"PANDA"\s*:\s*"([^"]*)"\s*[\}\]\]]', response, re.DOTALL) if match: - code = match.group(1).strip() - code = re.sub(r'[\]\}]+$', '', code) + code = match.group(1) + # Заменяем экранированные кавычки на обычные + code = code.replace('\\"', '"') + code = code.replace("\\'", "'") return code return response -def normalize_value(value): - df_temp = pd.DataFrame({'col': [value]}) - df_norm = convert_dataset_types(df_temp) - return df_norm['col'].iloc[0] +def normalize_value(value): + return convert_type(value) def evaluate_code(code_str, df, expected): # Нормализуем таблицу diff --git a/WTQ/Qwen/config.py b/WTQ/Qwen/config.py index 8d3ffd2..a0392a7 100644 --- a/WTQ/Qwen/config.py +++ b/WTQ/Qwen/config.py @@ -14,6 +14,22 @@ Filtering by date: df[df['Date'] > convert_type('2000-01-01')] Arithmetic with constants: convert_type('1000') + df['Bonus'] String constant comparisons do NOT require convert_type: df[df['Team'] == 'Crettyard'] +Always use find_word(value: str)-> str when filtering by string entities mentioned in the question — such as names of companies, people, cities, categories, etc. — regardless of whether typos or variations are suspected. +Examples for find_word(): +# Question: "Who scored the most points for Manchester United?" +df[df["Team"] == find_word("Manchester United")]["Points"].max() + +# Question: "How many players from Belgrade are in the list?" +df[df["City"] == find_word("Belgrade")].shape[0] + +# Question: "What category does 'Grand Slam' belong to?" +df[df["Category"] == find_word("Grand Slam")]["Type"].iloc[0] + +# Question: "Find the record for Elliot Benyon" +df[df["Name"] == find_word("Elliot Benyon")] + +# Question: "Which club signed a player from Australia?" +df[df["Signed from"] == find_word("Australia")] Examples: Question: "Which team scored the most points?" diff --git a/WTQ/Qwen/get_pandas_code.py b/WTQ/Qwen/get_pandas_code.py index cc6c658..e05759b 100644 --- a/WTQ/Qwen/get_pandas_code.py +++ b/WTQ/Qwen/get_pandas_code.py @@ -22,9 +22,9 @@ async def get_pandas(question: str, ser_tbl: str, tbl_types: dict, max_rows=20): success, responses = await send_message_async( messages=[system_message, user_message ], - base_url="http://192.168.19.127:9886/v1", + base_url="http://127.0.0.1:9123/v1", api_key='EMPTY', - model_name='Qwen/Qwen3-4B-Instruct-2507', + model_name='Qwen/Qwen3-Coder-Next', temperature=0.3, ) diff --git a/WTQ/Qwen/main.py b/WTQ/Qwen/main.py index f289399..607af87 100644 --- a/WTQ/Qwen/main.py +++ b/WTQ/Qwen/main.py @@ -2,74 +2,232 @@ import numpy as np import asyncio import json +import logging +import os +from datetime import datetime +from tqdm.asyncio import tqdm from get_pandas_code import get_pandas from check_result import evaluate_code from config import system_prompt -from utils.normalize import convert_dataset_types,convert_type +from utils.normalize import convert_dataset_types, convert_type, find_word from utils.utils import serialize_table_to_tapex_format from check_result import extract_code_from_response from utils.type_check import analyze_dataset_parallel +# Создаём папку для результатов +RESULTS_DIR = 'train_results' +os.makedirs(RESULTS_DIR, exist_ok=True) + +# Настройка логирования в файл +timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") +log_filename = os.path.join(RESULTS_DIR, f'execution_log_{timestamp}.log') +results_filename = os.path.join(RESULTS_DIR, f'results_{timestamp}.json') +checkpoint_filename = os.path.join(RESULTS_DIR, f'checkpoint_{timestamp}.json') + +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(levelname)s - %(message)s', + handlers=[ + logging.FileHandler(log_filename, encoding='utf-8'), + logging.StreamHandler() + ] +) +logger = logging.getLogger(__name__) + train = pd.read_csv('../../datasets/WikiTableQuestions/training.tsv', sep='\t') +def save_checkpoint(results, filename): + """Сохраняет промежуточные результаты""" + with open(filename, 'w', encoding='utf-8') as f: + json.dump(results, f, ensure_ascii=False, indent=2, default=str) + logger.info(f"Checkpoint saved: {len(results)} results") + + +def load_checkpoint(filename): + """Загружает сохранённые результаты""" + if os.path.exists(filename): + with open(filename, 'r', encoding='utf-8') as f: + return json.load(f) + return [] + + +def safe_convert_type(value): + """Безопасная обёртка для convert_type""" + try: + return convert_type(value) + except Exception as e: + logger.warning(f"convert_type failed for '{value}': {e}") + return value + + def run_code_and_get_result(code_str, df): """Выполняет код на нормализованной таблице и возвращает (результат, ошибка)""" - df_norm = convert_dataset_types(df) + try: + df_norm = convert_dataset_types(df) + except Exception as e: + return None, f"convert_dataset_types error: {e}" + clean_code = extract_code_from_response(code_str) + if clean_code is None: + return None, "extract_code_from_response returned None" + try: - result = eval(clean_code, {'df': df_norm, 'pd': pd, 'np': np,'convert_type':convert_type}) + result = eval(clean_code, { + 'df': df_norm, + 'pd': pd, + 'np': np, + 'convert_type': convert_type, + 'find_word': find_word + }) return result, None except Exception as e: return None, str(e) -async def main(): - json_result = [] +async def process_single_row(i, train): + """Обработка одной строки""" + try: + # 1. Чтение TSV с обработкой ошибок + file_path = train.iloc[i].context + tsv_path = file_path.replace('.csv', '.tsv').replace('/csv/', '/tsv/') + full_path = '../../datasets/WikiTableQuestions/' + tsv_path + + try: + df = pd.read_csv(full_path, sep='\t') + except Exception as e: + logger.error(f"Failed to read {full_path}: {e}") + return { + 'id': i, + 'table': file_path, + 'code': None, + 'result': None, + 'error': f"CSV/TSV read error: {e}", + 'is_correct': False, + 'target_value': train.iloc[i].targetValue + } - for i in range(30,40): - df = pd.read_csv('../../datasets/WikiTableQuestions/' + train.iloc[i].context) targetValue = train.iloc[i].targetValue question = train.iloc[i].utterance - norm_df = convert_dataset_types(df) - ser_df = serialize_table_to_tapex_format(norm_df) - df_types = analyze_dataset_parallel(df) - columns_types = {col: types[0] for col, types in df_types.items()} - - code = await get_pandas(question, ser_df, columns_types) - result = run_code_and_get_result(code, df) - error = None - if result[0] is not None: - result = result[0] - else: - error = result[1] - result = None - - json_result.append({ + # 2. Нормализация и анализ типов с защитой + try: + norm_df = convert_dataset_types(df) + except Exception as e: + logger.error(f"convert_dataset_types failed for row {i}: {e}") + norm_df = df.copy() + + try: + ser_df = serialize_table_to_tapex_format(norm_df) + except Exception as e: + logger.error(f"serialize_table_to_tapex_format failed for row {i}: {e}") + ser_df = str(norm_df.head()) + + try: + df_types = analyze_dataset_parallel(df) + columns_types = {col: types[0] for col, types in df_types.items()} + except Exception as e: + logger.warning(f"analyze_dataset_parallel failed for row {i}: {e}") + columns_types = {} + + # 3. Получение кода от модели + try: + code = await get_pandas(question, ser_df, columns_types) + except Exception as e: + logger.error(f"get_pandas failed for row {i}: {e}") + return { + 'id': i, + 'table': file_path, + 'code': None, + 'result': None, + 'error': f"get_pandas error: {e}", + 'is_correct': False, + 'target_value': targetValue + } + + # 4. Выполнение кода + result, error = run_code_and_get_result(code, df) + + # 5. Сравнение с ожидаемым значением + try: + norm_target = safe_convert_type(targetValue) + is_correct = (result == norm_target) if error is None else False + except Exception as e: + logger.warning(f"Comparison failed for row {i}: {e}") + is_correct = False + + return { 'id': i, - 'table': train.iloc[i].context, + 'table': file_path, 'code': code, - 'result': result, - 'error': error - }) + 'result': str(result) if result is not None else None, + 'error': error, + 'is_correct': bool(is_correct), + 'target_value': targetValue + } + + except Exception as e: + logger.exception(f"Unexpected error in process_single_row for id {i}: {e}") + return { + 'id': i, + 'table': train.iloc[i].context if i < len(train) else 'unknown', + 'code': None, + 'result': None, + 'error': f"Unexpected: {e}", + 'is_correct': False, + 'target_value': train.iloc[i].targetValue if i < len(train) else None + } + + +async def main(): + # Генерация ID для обработки + ids = list(range(len(train))) + + # Загружаем уже обработанные ID из чекпоинта + existing_results = load_checkpoint(checkpoint_filename) + processed_ids = {r['id'] for r in existing_results} - # print(system_prompt) - # print('table ',ser_df) - # # print('coll types ',columns_types) - # print('question ',question) - # print(run_code_and_get_result(input(),df)) - # print(targetValue) - # print() - # print() - # print() - # print() - # print() - # print() - return json_result + # Фильтруем необработанные + remaining_ids = [i for i in ids if i not in processed_ids] + + logger.info(f"Total: {len(ids)}, Already processed: {len(processed_ids)}, Remaining: {len(remaining_ids)}") + logger.info(f"Results directory: {os.path.abspath(RESULTS_DIR)}") + + results = existing_results.copy() + + # Обрабатываем оставшиеся последовательно (для сохранения каждые 100) + if remaining_ids: + with tqdm(total=len(remaining_ids), desc="Processing WikiTableQuestions") as pbar: + for idx, i in enumerate(remaining_ids, 1): + result = await process_single_row(i, train) + results.append(result) + + # Сохраняем чекпоинт каждые 100 строк + if idx % 100 == 0: + save_checkpoint(results, checkpoint_filename) + + pbar.update(1) + + # Финальное сохранение + save_checkpoint(results, results_filename) + + # Подсчёт правильных ответов + total_correct = sum(1 for r in results if r['is_correct']) + + logger.info(f"Final results saved to {results_filename}") + logger.info(f"Logs saved to {log_filename}") + + return results, total_correct if __name__ == "__main__": - result = asyncio.run(main()) # ← сохраняем результат - for i in result: - print(i) + result, count = asyncio.run(main()) + + # Вывод результатов в консоль + print("\n" + "=" * 50) + for item in result: + print(f"id: {item['id']}, correct: {item['is_correct']}, error: {item['error']}") + + print(f"\nTotal correct: {count} out of {len(result)}") + print(f"Accuracy: {count / len(result) * 100:.2f}%") + print(f"Results saved in folder: {RESULTS_DIR}/") \ No newline at end of file diff --git a/WTQ/Qwen/train_results/show_statistics.py b/WTQ/Qwen/train_results/show_statistics.py new file mode 100644 index 0000000..d3915c5 --- /dev/null +++ b/WTQ/Qwen/train_results/show_statistics.py @@ -0,0 +1,43 @@ +import pandas as pd +import json +import matplotlib.pyplot +import json +# +# with open('results_20260524_154618.json') as f: +# results = json.load(f) +# +# unique_error = {} +# +# for i in results: +# if i['error'] not in unique_error: +# unique_error[i['error']] = 1 +# else: +# unique_error[i['error']] += 1 +# +# # Сохраняем в JSON файл +# with open('unique_errors.json', 'w', encoding='utf-8') as f: +# json.dump(unique_error, f, ensure_ascii=False, indent=4) +# +# print("Сохранено в unique_errors.json") + + +import json + +# Загрузка данных +with open('unique_errors.json', 'r') as f: + errors = json.load(f) + +# Сортировка по убыванию (по количеству) +sorted_errors = dict(sorted(errors.items(), key=lambda x: x[1], reverse=True)) + +# Вывод топ-20 +for i, (error, count) in enumerate(list(sorted_errors.items())[:20], 1): + print(f"{i:2}. {count:5} x {error[:80]}{'...' if len(error) > 80 else ''}") + +# Сохранение в файл +with open('sorted_errors.json', 'w', encoding='utf-8') as f: + json.dump(sorted_errors, f, ensure_ascii=False, indent=4) + +print(f"\n✅ Сохранено в sorted_errors.json") +print(f"Всего типов ошибок: {len(sorted_errors)}") +print(f"null (успешно): {sorted_errors.get('null', 0)}") \ No newline at end of file diff --git a/utils/normalize.py b/utils/normalize.py index 26e42e4..7f98a6b 100644 --- a/utils/normalize.py +++ b/utils/normalize.py @@ -18,9 +18,9 @@ def convert_value(v, t, keep_original_on_error=True): return original if keep_original_on_error else pd.NA if t == 'bool': - if re.match(r'^(true|yes|да|истина|1|\+)$', s, re.I): + if re.match(r'^(true|yes|да|истина|\+)$', s, re.I): return True - if re.match(r'^(false|no|нет|ложь|0|-|\[ \])$', s, re.I): + if re.match(r'^(false|no|нет|ложь|-|\[ \])$', s, re.I): return False return original if keep_original_on_error else pd.NA @@ -101,15 +101,27 @@ def convert_dataset_types(df, max_workers=None): pass return res + def convert_type(value): + # Копируем только если это необходимо + try: + res = value.copy() + except AttributeError: + res = value + type = check_type_comprehensive(value)[0] - res = value.copy() try: - res = convert_value(res,type) + res = convert_value(res, type) except: pass + return res + +def find_word(value: str)-> str: + return value + + # train = pd.read_csv('../datasets/WikiTableQuestions/training.tsv', sep='\t') # # df = pd.read_csv('../datasets/WikiTableQuestions/' + train.iloc[1].context) From 4dbcb1b6783cc79b266bff04ef7b8a43491206a8 Mon Sep 17 00:00:00 2001 From: AldarArmaev Date: Fri, 22 May 2026 00:19:07 +0800 Subject: [PATCH 08/10] =?UTF-8?q?=D0=94=D0=BE=D0=B1=D0=B0=D0=B2=D0=BB?= =?UTF-8?q?=D0=B5=D0=BD=20=D0=BF=D1=80=D0=B8=D0=BC=D0=B5=D1=80=20=D1=80?= =?UTF-8?q?=D0=B0=D0=B1=D0=BE=D1=82=D1=8B=20Qwen=207b=20=D0=B4=D0=BB=D1=8F?= =?UTF-8?q?=20=D0=B2=D0=BE=D0=BF=D1=80=D0=BE=D1=81-=D0=BE=D1=82=D0=B2?= =?UTF-8?q?=D0=B5=D1=82=20=D0=BF=D0=BE=20=D1=82=D0=B0=D0=B1=D0=BB=D0=B8?= =?UTF-8?q?=D1=86=D0=B5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- WTQ/Qwen/Untitled.ipynb | 1275 +++++++++++++++++++++++++++++++++++++++ WTQ/Qwen/config.py | 57 ++ WTQ/Qwen/server.py | 75 +++ 3 files changed, 1407 insertions(+) create mode 100644 WTQ/Qwen/Untitled.ipynb create mode 100644 WTQ/Qwen/config.py create mode 100644 WTQ/Qwen/server.py diff --git a/WTQ/Qwen/Untitled.ipynb b/WTQ/Qwen/Untitled.ipynb new file mode 100644 index 0000000..a8b094e --- /dev/null +++ b/WTQ/Qwen/Untitled.ipynb @@ -0,0 +1,1275 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 5, + "id": "b824dfad-5524-47e2-b804-6f71e0507f49", + "metadata": {}, + "outputs": [], + "source": [ + "import openai\n", + "import time\n", + "import re\n", + "import pandas as pd\n", + "\n", + "def send_message(\n", + " message, \n", + " max_tokens=1000, # Увеличил дефолт, так как таблицы и логи могут быть длинными\n", + " top_p=0.9, \n", + " temperature=0.0, # Для задач интерпретатора строго 0.0 по умолчанию\n", + " server_url=\"http://127.0.0.1:9092/v1\", \n", + " api_key=\"dummy\",\n", + " model_name='Qwen2.5-Coder-7B-Instruct', \n", + " stop=None, # Для обычных ответов стоп-слова лучше сделать опциональными\n", + " retries=3 # Количество попыток при падении сервера\n", + "):\n", + " # Инициализируем клиент OpenAI\n", + " client = openai.OpenAI(base_url=server_url, api_key=api_key)\n", + " \n", + " model_input = [\n", + " { 'role': 'user', 'content': message}\n", + " ]\n", + " \n", + " try:\n", + " print(f\"Generating content with model: {model_name} (Temp: {temperature})\")\n", + " \n", + " response = client.chat.completions.create(\n", + " model=model_name,\n", + " messages=model_input,\n", + " temperature=temperature,\n", + " max_tokens=max_tokens,\n", + " top_p=top_p,\n", + " stop=stop\n", + " )\n", + " \n", + " return True, response.choices[0].message.content\n", + "\n", + " except Exception as e:\n", + " print(f\"\\n[ERROR] Failed to call LLM: {e}\")\n", + " \n", + " # Разбираем ответ сервера, если он есть\n", + " if hasattr(e, 'response') and e.response is not None:\n", + " try:\n", + " error_info = e.response.json() \n", + " code_value = error_info.get('error', {}).get('code', 'unknown_error')\n", + " print(f\"Код ошибки от сервера: {code_value}\")\n", + " except Exception:\n", + " print(f\"Сырой ответ сервера об ошибке: {e.response.text}\")\n", + " \n", + " # Если попытки еще остались — пробуем снова\n", + " if retries > 0:\n", + " print(f\"Waiting 6 seconds before retry... (Remaining retries: {retries})\")\n", + " time.sleep(6)\n", + " return send_message(\n", + " message=message, max_tokens=max_tokens, top_p=top_p, \n", + " temperature=temperature, server_url=server_url, api_key=api_key, \n", + " model_name=model_name, stop=stop, retries=retries-1\n", + " )\n", + " else:\n", + " print(\"All retries failed. Skipping.\")\n", + " return False, None\n", + "\n", + "def parse_panda_code(input_string):\n", + " # Сначала попробуем найти JSON объект с PANDA\n", + " json_pattern = r'\\{[^{}]*(?:CORRECT PANDA|PANDA)\":\\s*(.+?)(?:\\n|$)?\\}'\n", + " json_match = re.search(json_pattern, input_string, re.DOTALL)\n", + " code = None\n", + " pattern = r'\"(?:CORRECT PANDA|PANDA)\":\\s*(.+?)(?:\\n|$)'\n", + " if json_match:\n", + " code = json_match.group(1).strip()\n", + " else:\n", + " match = re.search(pattern, input_string, re.DOTALL)\n", + " if match:\n", + " code = match.group(1).strip()\n", + "\n", + " if code != None:\n", + " if code.startswith('\"') and code.endswith('\"'):\n", + " code = code[1:-1]\n", + " elif code.startswith(\"'\") and code.endswith(\"'\"):\n", + " code = code[1:-1]\n", + " \n", + " return code\n", + " \n", + " return \"\"\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "b78aedf9-c11e-4004-b00f-d661578c3400", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idutterancecontexttargetValue
0nt-0what was the last year where this team was a p...csv/204-csv/590.csv2004
1nt-1in what city did piotr's last 1st place finish...csv/204-csv/622.csvBangkok, Thailand
2nt-2which team won previous to crettyard?csv/204-csv/772.csvWolfe Tones
3nt-3how many more passengers flew to los angeles t...csv/203-csv/515.csv12,467
4nt-4who was the opponent in the first game of the ...csv/204-csv/495.csvDerby County
...............
14144nt-14147who came in last?csv/204-csv/433.csvJavier Díaz
14145nt-14148which album has the highest number of sales bu...csv/204-csv/949.csvVain elämää
14146nt-14149japan finished below how many countries?csv/204-csv/183.csv0
14147nt-14150how many districts have a population density o...csv/204-csv/739.csv31
14148nt-14151what entrant has the most drivers?csv/203-csv/670.csvOwen Racing Organisation
\n", + "

14149 rows × 4 columns

\n", + "
" + ], + "text/plain": [ + " id utterance \\\n", + "0 nt-0 what was the last year where this team was a p... \n", + "1 nt-1 in what city did piotr's last 1st place finish... \n", + "2 nt-2 which team won previous to crettyard? \n", + "3 nt-3 how many more passengers flew to los angeles t... \n", + "4 nt-4 who was the opponent in the first game of the ... \n", + "... ... ... \n", + "14144 nt-14147 who came in last? \n", + "14145 nt-14148 which album has the highest number of sales bu... \n", + "14146 nt-14149 japan finished below how many countries? \n", + "14147 nt-14150 how many districts have a population density o... \n", + "14148 nt-14151 what entrant has the most drivers? \n", + "\n", + " context targetValue \n", + "0 csv/204-csv/590.csv 2004 \n", + "1 csv/204-csv/622.csv Bangkok, Thailand \n", + "2 csv/204-csv/772.csv Wolfe Tones \n", + "3 csv/203-csv/515.csv 12,467 \n", + "4 csv/204-csv/495.csv Derby County \n", + "... ... ... \n", + "14144 csv/204-csv/433.csv Javier Díaz \n", + "14145 csv/204-csv/949.csv Vain elämää \n", + "14146 csv/204-csv/183.csv 0 \n", + "14147 csv/204-csv/739.csv 31 \n", + "14148 csv/203-csv/670.csv Owen Racing Organisation \n", + "\n", + "[14149 rows x 4 columns]" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "train = pd.read_csv('data/data/training.tsv', sep = '\\t')\n", + "train" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "8e85b283-9458-4fcf-891a-4a34e1f2697d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
YearDivisionLeagueRegular SeasonPlayoffsOpen CupAvg. Attendance
020012USL A-League4th, WesternQuarterfinalsDid not qualify7,169
120022USL A-League2nd, Pacific1st RoundDid not qualify6,260
220032USL A-League3rd, PacificDid not qualifyDid not qualify5,871
320042USL A-League1st, WesternQuarterfinals4th Round5,628
420052USL First Division5thQuarterfinals4th Round6,028
520062USL First Division11thDid not qualify3rd Round5,575
620072USL First Division2ndSemifinals2nd Round6,851
720082USL First Division11thDid not qualify1st Round8,567
820092USL First Division1stSemifinals3rd Round9,734
920102USSF D-2 Pro League3rd, USL (3rd)Quarterfinals3rd Round10,727
\n", + "
" + ], + "text/plain": [ + " Year Division League Regular Season Playoffs \\\n", + "0 2001 2 USL A-League 4th, Western Quarterfinals \n", + "1 2002 2 USL A-League 2nd, Pacific 1st Round \n", + "2 2003 2 USL A-League 3rd, Pacific Did not qualify \n", + "3 2004 2 USL A-League 1st, Western Quarterfinals \n", + "4 2005 2 USL First Division 5th Quarterfinals \n", + "5 2006 2 USL First Division 11th Did not qualify \n", + "6 2007 2 USL First Division 2nd Semifinals \n", + "7 2008 2 USL First Division 11th Did not qualify \n", + "8 2009 2 USL First Division 1st Semifinals \n", + "9 2010 2 USSF D-2 Pro League 3rd, USL (3rd) Quarterfinals \n", + "\n", + " Open Cup Avg. Attendance \n", + "0 Did not qualify 7,169 \n", + "1 Did not qualify 6,260 \n", + "2 Did not qualify 5,871 \n", + "3 4th Round 5,628 \n", + "4 4th Round 6,028 \n", + "5 3rd Round 5,575 \n", + "6 2nd Round 6,851 \n", + "7 1st Round 8,567 \n", + "8 3rd Round 9,734 \n", + "9 3rd Round 10,727 " + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "csv = pd.read_csv('data/'+ train.context[0])\n", + "csv" + ] + }, + { + "cell_type": "code", + "execution_count": 124, + "id": "651a5fb2-4dda-4544-8282-76455880387f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Year Division League Regular Season Playoffs \\\n", + "0 2001 2 USL A-League 4th, Western Quarterfinals \n", + "1 2002 2 USL A-League 2nd, Pacific 1st Round \n", + "2 2003 2 USL A-League 3rd, Pacific Did not qualify \n", + "3 2004 2 USL A-League 1st, Western Quarterfinals \n", + "4 2005 2 USL First Division 5th Quarterfinals \n", + "5 2006 2 USL First Division 11th Did not qualify \n", + "6 2007 2 USL First Division 2nd Semifinals \n", + "7 2008 2 USL First Division 11th Did not qualify \n", + "8 2009 2 USL First Division 1st Semifinals \n", + "9 2010 2 USSF D-2 Pro League 3rd, USL (3rd) Quarterfinals \n", + "\n", + " Open Cup Avg. Attendance \n", + "0 Did not qualify 7,169 \n", + "1 Did not qualify 6,260 \n", + "2 Did not qualify 5,871 \n", + "3 4th Round 5,628 \n", + "4 4th Round 6,028 \n", + "5 3rd Round 5,575 \n", + "6 2nd Round 6,851 \n", + "7 1st Round 8,567 \n", + "8 3rd Round 9,734 \n", + "9 3rd Round 10,727 \n" + ] + } + ], + "source": [ + "df_table = pd.read_csv('data/' + train.context.iloc[i])\n", + "print(df_table)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "id": "1ce82a05-c0dd-438b-83df-dc619d3f859b", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Отправляем корректный запрос на локальный Qwen...\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "True ```json\n", + "{\n", + " \"PANDA\": \"df.loc[df['Division'] == 2 & df['League'] == 'USL A-League', 'Year'].max()\"\n", + "}\n", + "```\n", + "\n", + "--- Ответ от модели ---\n", + "```json\n", + "{\n", + " \"PANDA\": \"df.loc[df['Division'] == 2 & df['League'] == 'USL A-League', 'Year'].max()\"\n", + "}\n", + "```\n", + "You are a Python expert specializing in pandas. You are given a question and a table. Your task is to translate the given natural language question into\n", + "a single-line pandas expression. This expression, which acts like a query, must\n", + "be valid and executable so that running the pandas expression will output the\n", + "answer to the question. Consider the following:\n", + "1. The table is represented as a pandas DataFrame named df.\n", + "2. Do not include explanations, comments, or multiline outputs.\n", + "3. Ensure the output is concise, correct, and when run, it outputs the correct\n", + "given answer, and strictly follows the Json format: {{\"PANDA\": \"\"}}\n", + "\n", + "### Table schema\n", + " Year Division League Regular Season Playoffs Open Cup Avg. Attendance\n", + " 2001 2 USL A-League 4th, Western Quarterfinals Did not qualify 7,169\n", + " 2002 2 USL A-League 2nd, Pacific 1st Round Did not qualify 6,260\n", + " 2003 2 USL A-League 3rd, Pacific Did not qualify Did not qualify 5,871\n", + " 2004 2 USL A-League 1st, Western Quarterfinals 4th Round 5,628\n", + " 2005 2 USL First Division 5th Quarterfinals 4th Round 6,028\n", + " 2006 2 USL First Division 11th Did not qualify 3rd Round 5,575\n", + " 2007 2 USL First Division 2nd Semifinals 2nd Round 6,851\n", + " 2008 2 USL First Division 11th Did not qualify 1st Round 8,567\n", + " 2009 2 USL First Division 1st Semifinals 3rd Round 9,734\n", + " 2010 2 USSF D-2 Pro League 3rd, USL (3rd) Quarterfinals 3rd Round 10,727\n", + "\n", + "### Query\n", + "what was the last year where this team was a part of the usl a-league?\n", + "\n", + "\n", + "\n", + "--- Извлеченный код Pandas ---\n", + "[df.loc[df['Division'] == 2 & df['League'] == 'USL A-League', 'Year'].max()]\n" + ] + } + ], + "source": [ + "import config\n", + "import importlib\n", + "import pandas as pd\n", + "\n", + "# 0. Принудительно перезагружаем конфиг\n", + "importlib.reload(config) \n", + "\n", + "# Предположим, мы берем первый пример (i = 0)\n", + "i = 0\n", + "\n", + "# ИСПРАВЛЕНИЕ: Убедитесь, что переменная `train` у вас определена выше в коде (например, train = pd.read_csv('...'))\n", + "\n", + "# 1. Достаем чистый текст вопроса\n", + "query_text = train.utterance.iloc[i]\n", + "\n", + "# 2. Читаем таблицу и превращаем ЕЁ В ТЕКСТ (в формат Markdown)\n", + "table_text = pd.read_csv('data/' + train.context.iloc[i]).to_string(index=False)\n", + "#table_text = df_table.to_markdown(index=False) \n", + "\n", + "# 3. Безопасная подстановка данных через .replace() вместо .format()\n", + "# Это защитит от ошибок, если в промпте есть другие фигурные скобки {}\n", + "full_message = config.system_prompt.replace(\"{table}\", table_text).replace(\"{query}\", query_text)\n", + "\n", + "print(\"Отправляем корректный запрос на локальный Qwen...\")\n", + "\n", + "# ИСПРАВЛЕНИЕ: Передаем именно full_message\n", + "success, response = send_message(\n", + " message=full_message,\n", + ")\n", + "\n", + " \n", + "print(success, response)\n", + "if success:\n", + " print(\"\\n--- Ответ от модели ---\")\n", + " print(response)\n", + " print(full_message)\n", + " parsed_code = parse_panda_code(response)\n", + " print(\"\\n--- Извлеченный код Pandas ---\")\n", + " print(f\"[{parsed_code}]\")\n", + "else:\n", + " print(\"Не удалось получить ответ от сервера.\")" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "id": "2159706c-e803-4a50-af6c-00a721ae3508", + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "import re\n", + "\n", + "def exec_pandas(json_str):\n", + " # Удаляем markdown-обертку ```json и ```\n", + " cleaned = re.sub(r'^```json\\n|\\n```$', '', json_str.strip())\n", + " return json.loads(cleaned)[\"PANDA\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 67, + "id": "77ee59ff-4cb0-46bb-a3ce-8ad01590ac47", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n" + ] + } + ], + "source": [ + "import importlib\n", + "import config\n", + "importlib.reload(config) \n", + "\n", + "n = 10\n", + "\n", + "code = {\n", + " 'correct': '',\n", + " 're_correct': '',\n", + " 're_uncorrect': '' # исправлена опечатка\n", + "}\n", + "\n", + "for i in range(n):\n", + " query_text = train.utterance.iloc[i]\n", + " table_path = 'data/' + train.context.iloc[i]\n", + " table_text = pd.read_csv(table_path).to_string(index=False)\n", + " full_message = config.system_prompt.replace(\"{table}\", table_text).replace(\"{query}\", query_text)\n", + "\n", + " success, response = send_message(message=full_message)\n", + " \n", + " if not success:\n", + " continue # или обработка ошибки\n", + "\n", + " label = train.targetValue.iloc[i] # вынести сюда, чтобы была доступна везде\n", + "\n", + " try:\n", + " df = pd.read_csv(table_path)\n", + " result = eval(exec_pandas(response))\n", + " if result == label:\n", + " code['correct'] += \" \" + exec_pandas(response)\n", + " else:\n", + " logic_message = config.logic_prompt.replace(\"{table}\", table_text).replace(\"{query}\", query_text).replace(\"{label}\", str(label)).replace(\"{pandas}\", exec_pandas(response))\n", + " success2, response2 = send_message(message=logic_message)\n", + " if success2:\n", + " try:\n", + " if eval(exec_pandas(response2)) == label:\n", + " code[\"re_correct\"] += \" \" + exec_pandas(response2)\n", + " except:\n", + " code[\"re_uncorrect\"] += \" \" + exec_pandas(response2)\n", + " except Exception as e:\n", + " # label уже определена\n", + " correct_message = config.correct_prompt.replace(\"{table}\", table_text).replace(\"{query}\", query_text).replace(\"{label}\", str(label)).replace(\"{pandas}\", exec_pandas(response))\n", + " success2, response2 = send_message(message=correct_message)\n", + " if success2:\n", + " try:\n", + " if eval(exec_pandas(response2)) == label:\n", + " code[\"re_correct\"] += \" \" + exec_pandas(response2)\n", + " except:\n", + " code[\"re_uncorrect\"] += \" \" + exec_pandas(response2)" + ] + }, + { + "cell_type": "code", + "execution_count": 71, + "id": "97549381-ebfe-4cee-846b-bdd7bcb8feae", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "[0] Execution error: operation 'rand_' not supported for dtype 'str' with object of type \n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "[3] Execution error: unsupported operand type(s) for -: 'str' and 'str'\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "[5] Execution error: operation 'sub' not supported for dtype 'str' with dtype 'datetime64[us]'\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "[6] Execution error: Expecting ',' delimiter: line 2 column 27 (char 28)\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "[8] Execution error: 'Full house'\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "Generating content with model: Qwen2.5-Coder-7B-Instruct (Temp: 0.0)\n", + "Correct: 2, Logic: 0, Syntax: 0, Failed: 8\n", + "Total processed: 10\n" + ] + } + ], + "source": [ + "correct_count = 0\n", + "logic_count = 0\n", + "syntax_count = 0\n", + "failed_count = 0\n", + "\n", + "n = 10\n", + "\n", + "code = {\n", + " 'correct': '',\n", + " 're_correct': '',\n", + " 're_uncorrect': ''\n", + "}\n", + "\n", + "for i in range(n):\n", + " query_text = train.utterance.iloc[i]\n", + " table_path = 'data/' + train.context.iloc[i]\n", + " table_text = pd.read_csv(table_path).to_string(index=False)\n", + " full_message = config.system_prompt.replace(\"{table}\", table_text).replace(\"{query}\", query_text)\n", + "\n", + " success, response = send_message(message=full_message)\n", + " \n", + " if not success:\n", + " failed_count += 1\n", + " print(f\"[{i}] send_message failed\")\n", + " continue\n", + "\n", + " label = train.targetValue.iloc[i]\n", + " pandas_code = exec_pandas(response)\n", + " \n", + " if not pandas_code:\n", + " failed_count += 1\n", + " print(f\"[{i}] No pandas code extracted\")\n", + " continue\n", + "\n", + " # Первая попытка: выполнить сгенерированный код\n", + " try:\n", + " df = pd.read_csv(table_path)\n", + " result = eval(pandas_code)\n", + " \n", + " if result == label:\n", + " code['correct'] += \" \" + pandas_code\n", + " correct_count += 1\n", + " else:\n", + " # Логическая коррекция\n", + " logic_message = config.logic_prompt.replace(\"{table}\", table_text).replace(\"{query}\", query_text).replace(\"{label}\", str(label)).replace(\"{pandas}\", pandas_code)\n", + " success2, response2 = send_message(message=logic_message)\n", + " \n", + " if success2:\n", + " pandas_code2 = exec_pandas(response2)\n", + " try:\n", + " if eval(pandas_code2) == label:\n", + " code[\"re_correct\"] += \" \" + pandas_code2\n", + " logic_count += 1\n", + " else:\n", + " code[\"re_uncorrect\"] += \" \" + pandas_code2\n", + " failed_count += 1\n", + " except:\n", + " code[\"re_uncorrect\"] += \" \" + pandas_code2\n", + " failed_count += 1\n", + " else:\n", + " failed_count += 1\n", + " \n", + " except Exception as e:\n", + " # Синтаксическая коррекция (код не выполнился из-за ошибки)\n", + " print(f\"[{i}] Execution error: {e}\")\n", + " \n", + " syntax_message = config.correct_prompt.replace(\"{table}\", table_text).replace(\"{query}\", query_text).replace(\"{label}\", str(label)).replace(\"{pandas}\", pandas_code)\n", + " success2, response2 = send_message(message=syntax_message)\n", + " \n", + " if success2:\n", + " pandas_code2 = exec_pandas(response2)\n", + " try:\n", + " if eval(pandas_code2) == label:\n", + " code[\"re_correct\"] += \" \" + pandas_code2\n", + " syntax_count += 1\n", + " else:\n", + " code[\"re_uncorrect\"] += \" \" + pandas_code2\n", + " failed_count += 1\n", + " except:\n", + " code[\"re_uncorrect\"] += \" \" + pandas_code2\n", + " failed_count += 1\n", + " else:\n", + " failed_count += 1\n", + "\n", + "print(f\"Correct: {correct_count}, Logic: {logic_count}, Syntax: {syntax_count}, Failed: {failed_count}\")\n", + "print(f\"Total processed: {correct_count + logic_count + syntax_count + failed_count}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 73, + "id": "5565eca2-57ef-4543-bd32-af37c3e7ac68", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'correct': \" df.loc[df['Position'] == '1st', 'Venue'].iloc[-1] df['Opponent'].iloc[0]\",\n", + " 're_correct': '',\n", + " 're_uncorrect': \" df.loc[(df['Division'] == 2) & (df['League'] == 'USL A-League'), 'Year'].max() df.loc[df['Team'] == 'Wolfe Tones', 'Years won'].iloc[0] - 1 (df.loc[df['City'] == 'United States, Los Angeles', 'Passengers'].iloc[0] - df.loc[df['City'] == 'Canada, Saskatoon', 'Passengers'].iloc[0]) len(df[(df['Left office'].dt.to_period('D') - df['Took office'].dt.to_period('D')).astype(int') >= 1095]) df['Away team'].iloc[0] df.loc[df['Name in English'] == 'Lake Palas Tuzla', 'Depth'].values[0] df.loc['Full house', '4 credits'] df[(df['Position'] == df.loc[3, 'Position']) & (df['Player'] != 'Siim Ennemuist')]['Player'].tolist()\"}" + ] + }, + "execution_count": 73, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "code" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "id": "63549786-67fd-4212-91eb-eca42135a8a0", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "df.loc[df['Division'] == 2 & df['League'] == 'USL A-League', 'Year'].max()\n", + "df.loc[df['Position'] == '1st', 'Venue'].iloc[-1]\n", + "df.loc[df['Team'] == 'Crettyard', 'Years won'].iloc[0] - 1\n", + "(df.loc[df['City'] == 'United States, Los Angeles', 'Passengers'].values[0] - df.loc[df['City'] == 'Canada, Saskatoon', 'Passengers'].values[0])\n", + "df['Opponent'].iloc[0]\n", + "len(df[df['Left office'] - pd.to_datetime(df['Took office']) >= pd.Timedelta(days=1095)])\n", + "df['Away team'].iloc[0]\n", + "df.loc[df['Name in English'].isin(['Lake Tuz', 'Lake Palas Tuzla']), 'Depth'].max()\n", + "df.loc['Full house', '4 credits']\n", + "df[df['Position'] == df.loc[3, 'Position']]['Player'].tolist()\n" + ] + } + ], + "source": [ + "for i in range(len(code)):\n", + " current code = exec_pandas(code[i])\n", + " df = pd.read_csv('data/' + train.context.iloc[i])\n", + " target = train.targetValue.iloc[i]" + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "id": "1219cf2d-55b4-4cf8-9ee0-93702e2d1ce9", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{0: '```json\\n{\\n \"PANDA\": \"df.loc[df[\\'Division\\'] == 2 & df[\\'League\\'] == \\'USL A-League\\', \\'Year\\'].max()\"\\n}\\n```',\n", + " 1: '```json\\n{\\n \"PANDA\": \"df.loc[df[\\'Position\\'] == \\'1st\\', \\'Venue\\'].iloc[-1]\"\\n}\\n```',\n", + " 2: '```json\\n{\\n \"PANDA\": \"df.loc[df[\\'Team\\'] == \\'Crettyard\\', \\'Years won\\'].iloc[0] - 1\"\\n}\\n```',\n", + " 3: '```json\\n{\\n \"PANDA\": \"(df.loc[df[\\'City\\'] == \\'United States, Los Angeles\\', \\'Passengers\\'].values[0] - df.loc[df[\\'City\\'] == \\'Canada, Saskatoon\\', \\'Passengers\\'].values[0])\"\\n}\\n```',\n", + " 4: '```json\\n{\\n \"PANDA\": \"df[\\'Opponent\\'].iloc[0]\"\\n}\\n```',\n", + " 5: '```json\\n{\\n \"PANDA\": \"len(df[df[\\'Left office\\'] - pd.to_datetime(df[\\'Took office\\']) >= pd.Timedelta(days=1095)])\"\\n}\\n```',\n", + " 6: '```json\\n{\\n \"PANDA\": \"df[\\'Away team\\'].iloc[0]\"\\n}\\n```',\n", + " 7: '```json\\n{\\n \"PANDA\": \"df.loc[df[\\'Name in English\\'].isin([\\'Lake Tuz\\', \\'Lake Palas Tuzla\\']), \\'Depth\\'].max()\"\\n}\\n```',\n", + " 8: '```json\\n{\\n \"PANDA\": \"df.loc[\\'Full house\\', \\'4 credits\\']\"\\n}\\n```',\n", + " 9: '```json\\n{\\n \"PANDA\": \"df[df[\\'Position\\'] == df.loc[3, \\'Position\\']][\\'Player\\'].tolist()\"\\n}\\n```'}" + ] + }, + "execution_count": 55, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "code" + ] + }, + { + "cell_type": "code", + "execution_count": 156, + "id": "8c446433-f113-4c4f-a791-adac6e94a19a", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "```json\n", + "{\"PANDA\": \"df[df['League'] == 'USL A-League'].index[-1].year\"}\n", + "```\n", + " Year Division League Regular Season Playoffs Open Cup Avg. Attendance\n", + " 2001 2 USL A-League 4th, Western Quarterfinals Did not qualify 7,169\n", + " 2002 2 USL A-League 2nd, Pacific 1st Round Did not qualify 6,260\n", + " 2003 2 USL A-League 3rd, Pacific Did not qualify Did not qualify 5,871\n", + " 2004 2 USL A-League 1st, Western Quarterfinals 4th Round 5,628\n", + " 2005 2 USL First Division 5th Quarterfinals 4th Round 6,028\n", + " 2006 2 USL First Division 11th Did not qualify 3rd Round 5,575\n", + " 2007 2 USL First Division 2nd Semifinals 2nd Round 6,851\n", + " 2008 2 USL First Division 11th Did not qualify 1st Round 8,567\n", + " 2009 2 USL First Division 1st Semifinals 3rd Round 9,734\n", + " 2010 2 USSF D-2 Pro League 3rd, USL (3rd) Quarterfinals 3rd Round 10,727\n", + "what was the last year where this team was a part of the usl a-league?\n", + "```json\n", + "df[df['Position'] == '1st']['Venue'].iloc[-1]\n", + "```\n", + " Year Competition Venue Position Event Notes\n", + " 2001 World Youth Championships Debrecen, Hungary 2nd 400 m 47.12\n", + " 2001 World Youth Championships Debrecen, Hungary 1st Medley relay 1:50.46\n", + " 2001 European Junior Championships Grosseto, Italy 1st 4x400 m relay 3:06.12\n", + " 2003 European Junior Championships Tampere, Finland 3rd 400 m 46.69\n", + " 2003 European Junior Championships Tampere, Finland 2nd 4x400 m relay 3:08.62\n", + " 2005 European U23 Championships Erfurt, Germany 11th (sf) 400 m 46.62\n", + " 2005 European U23 Championships Erfurt, Germany 1st 4x400 m relay 3:04.41\n", + " 2005 Universiade Izmir, Turkey 7th 400 m 46.89\n", + " 2005 Universiade Izmir, Turkey 1st 4x400 m relay 3:02.57\n", + " 2006 World Indoor Championships Moscow, Russia 2nd (h) 4x400 m relay 3:06.10\n", + " 2006 European Championships Gothenburg, Sweden 3rd 4x400 m relay 3:01.73\n", + " 2007 European Indoor Championships Birmingham, United Kingdom 3rd 4x400 m relay 3:08.14\n", + " 2007 Universiade Bangkok, Thailand 7th 400 m 46.85\n", + " 2007 Universiade Bangkok, Thailand 1st 4x400 m relay 3:02.05\n", + " 2008 World Indoor Championships Valencia, Spain 4th 4x400 m relay 3:08.76\n", + " 2008 Olympic Games Beijing, China 7th 4x400 m relay 3:00.32\n", + " 2009 Universiade Belgrade, Serbia 2nd 4x400 m relay 3:05.69\n", + "in what city did piotr's last 1st place finish occur?\n", + "```json\n", + "df[df['County'] == 'Laois']['Team'].iloc[0]\n", + "```\n", + " Team County Wins Years won\n", + " Greystones Wicklow 1 2011\n", + "Ballymore Eustace Kildare 1 2010\n", + " Maynooth Kildare 1 2009\n", + " Ballyroan Abbey Laois 1 2008\n", + " Fingal Ravens Dublin 1 2007\n", + " Confey Kildare 1 2006\n", + " Crettyard Laois 1 2005\n", + " Wolfe Tones Meath 1 2004\n", + " Dundalk Gaels Louth 1 2003\n", + "which team won previous to crettyard?\n", + "```json\n", + "df[(df['City'] == 'Los Angeles') & (df['Ranking'] > 3)]['Passengers'].sum() - df[df['City'] == 'Saskatoon']['Passengers'].sum()\n", + "```\n", + " Rank City Passengers Ranking Airline\n", + " 1 United States, Los Angeles 14,749 NaN Alaska Airlines\n", + " 2 United States, Houston 5,465 NaN United Express\n", + " 3 Canada, Calgary 3,761 NaN Air Transat, WestJet\n", + " 4 Canada, Saskatoon 2,282 4.0 NaN\n", + " 5 Canada, Vancouver 2,103 NaN Air Transat\n", + " 6 United States, Phoenix 1,829 1.0 US Airways\n", + " 7 Canada, Toronto 1,202 1.0 Air Transat, CanJet\n", + " 8 Canada, Edmonton 110 NaN NaN\n", + " 9 United States, Oakland 107 NaN NaN\n", + "how many more passengers flew to los angeles than to saskatoon from manzanillo airport in 2013?\n", + "```pandas\n", + "df[df['Date'] == '15 August 1987']['Opponent']\n", + "```\n", + " Date Opponent Venue Result Attendance Scorers\n", + " 15 August 1987 Derby County Away 0–1 17,204 —\n", + " 18 August 1987 Coventry City Home 0–1 09,380 —\n", + " 22 August 1987 West Ham United Home 2–2 08,073 Harford (2)\n", + " 29 August 1987 Chelsea Away 0–3 16,075 —\n", + " 31 August 1987 Arsenal Home 1–1 08,745 Wilson (pen)\n", + " 5 September 1987 Oxford United Away 5–2 06,804 Breacker, Harford, Hill, Nwajiobi, B. Stein\n", + "12 September 1987 Everton Home 2–1 08,124 Hill, B. Stein\n", + "19 September 1987 Charlton Athletic Away 0–1 05,002 —\n", + "26 September 1987 Queens Park Rangers Away 0–2 11,175 —\n", + " 3 October 1987 Manchester United Home 1–1 09,137 Harford\n", + " 10 October 1987 Portsmouth Away 1–3 12,391 Harford (pen)\n", + " 17 October 1987 Wimbledon Home 2–0 07,018 B. Stein, Wilson\n", + " 24 October 1987 Liverpool Home 0–1 11,997 —\n", + " 7 November 1987 Newcastle United Home 4–0 07,638 Nwajiobi, B. Stein, M. Stein (2)\n", + " 14 November 1987 Sheffield Wednesday Away 2–0 16,960 Allinson, M. Stein\n", + " 21 November 1987 Tottenham Hotspur Home 2–0 10,091 Allinson (2)\n", + " 5 December 1987 Norwich City Home 1–2 07,002 B. Stein\n", + " 12 December 1987 Watford Away 1–0 12,152 Foster\n", + " 18 December 1987 Southampton Home 2–2 06,618 Harford, McDonough\n", + " 26 December 1987 Everton Away 0–2 32,128 —\n", + " 28 December 1987 Charlton Athletic Home 1–0 07,243 Wilson\n", + " 1 January 1988 Chelsea Home 3–0 08,018 Harford, B. Stein, M. Stein\n", + " 2 January 1988 West Ham United Away 1–1 16,716 M. Stein\n", + " 16 January 1988 Derby County Home 1–0 07,175 McDonough\n", + " 6 February 1988 Oxford United Home 7–4 08,063 Harford (2), McDonough, B.Stein, M.Stein (3)\n", + " 13 February 1988 Arsenal Away 1–2 22,612 M.Stein\n", + " 5 March 1988 Wimbledon Away 0–2 04,854 —\n", + " 15 March 1988 Coventry City Away 0–4 13,711 —\n", + " 29 March 1988 Portsmouth Home 4–1 06,740 B.Stein, M.Stein, Wilson, own goal\n", + " 2 April 1988 Newcastle United Away 0–4 20,752 —\n", + " 5 April 1988 Sheffield Wednesday Home 2–2 07,337 McDonough, B. Stein\n", + " 12 April 1988 Manchester United Away 0–3 28,830 —\n", + " 19 April 1988 Queens Park Rangers Home 2–1 06,735 Foster, Wilson (pen)\n", + " 30 April 1988 Norwich City Away 2–2 13,171 M. Stein, Wilson (pen)\n", + " 2 May 1988 Watford Home 2–1 10,409 Oldfield, Wilson (pen)\n", + " 4 May 1988 Tottenham Hotspur Away 1–2 15,437 Grimes\n", + " 7 May 1988 Southampton Away 1–1 12,722 Wilson\n", + " 9 May 1988 Liverpool Away 1–1 30,374 Oldfield\n", + " 13 May 1988 Nottingham Forest Home 1–1 09,108 Donaghy\n", + " 15 May 1988 Nottingham Forest Away 1–1 13,106 Oldfield\n", + "who was the opponent in the first game of the season?\n", + "```json\n", + "{\"PANDA\": \"len(df[df['Took office'] <= df['Left office'].shift()])\"}\n", + "```\n", + " Unnamed: 0 Name Took office Left office Party Notes/Events\n", + " 11 William McCreery March 4, 1803 March 3, 1809 Democratic Republican NaN\n", + " 12 Alexander McKim March 4, 1809 March 3, 1815 Democratic Republican NaN\n", + " 13 William Pinkney March 4, 1815 April 18, 1816 Democratic Republican Resigned to accept position as Minister Plenipotentiary to Russia\n", + " 14 Peter Little September 2, 1816 March 3, 1823 Democratic Republican NaN\n", + " 14 Peter Little March 4, 1823 March 3, 1825 Jacksonian DR NaN\n", + " 14 Peter Little March 4, 1825 March 3, 1829 Adams NaN\n", + " 15 Benjamin C. Howard March 4, 1829 March 3, 1833 Jacksonian NaN\n", + "how many people stayed at least 3 years in office?\n", + "```python\n", + "df['Away team'].iloc[0]\n", + "```\n", + " Tie no Home team Score Away team\n", + " 49 Dalsjöfors GoIF (WC) 1-4 Varbergs GIF (D3)\n", + " 50 Sjömarkens IF (D4) 1-4 BK Slätta Damm (D3)\n", + " 51 IF Tymer (D4) 0-3 Kållereds SK (D3)\n", + " 52 IFK Hjo (WC) 0-4 Nässjö FF (D3)\n", + " 53 Falköpings FK (D4) 2-0 Gånghesters SK (D4)\n", + " 54 Ankarsrums IS (WC) 1-2 Linköpings FF (D3)\n", + " 55 Rödsle BK (D4) 1-0 (gg) Skeninge IK (D4)\n", + " 56 Lindås BK (D4) 1-3 Hultsfreds FK (D3)\n", + " 57 Hvetlanda GIF (D4) 0-1 Åhus Horna BK (D3)\n", + " 58 Bredaryds IK (D4) 3-0 Ulricehamns IFK (D3)\n", + " 59 Hovslätts IK (D4) 0-9 Tidaholms GIF (D2)\n", + " 60 Torpa AIS (D4) 0-2 BK Zeros (D3)\n", + " 61 Fiskeby IF (WC) 2-1 (gg) Västerviks FF (D4)\n", + " 62 Gnösjö IF (D4) 1-3 Skövde AIK (D2)\n", + " 63 Sävsjö FF (D4) 5-3 Skillingaryds IS (D4)\n", + " 64 Boxholms IF (WC) 1-2 Tranås AIF (D3)\n", + " 65 LSW IF (D4) 2-1 Husqvarna FF (D2)\n", + " 66 Lessebo GoIF (D4) 0-1 Listerby IK (D4)\n", + " 67 Rörviks IF (D4) 0-2 Lunds BK (D2)\n", + " 68 Lagans AIK (D4) 0-1 Högaborgs BK (D2)\n", + " 69 IF Eksjö (D4) 1-4 Kalmar FF (D2)\n", + " 70 Limmareds IF (D4) 1-5 Växjö Norra IF (D2)\n", + " 71 Bankeryds SK (D4) 4-1 Hjulsbro IK (D2)\n", + " 72 Skultorps IF (D4) 0-2 BK Forward (D2)\n", + " 73 Gullspångs IF (D4) 0-7 Rynninge IK (D3)\n", + " 74 Skara FC (D4) 0-4 Karlslunds IF (D3)\n", + " 75 Bråtens IK (D4) 0-4 Vivalla-Lundby IF (D3)\n", + " 76 Finnerödja IF(D4) 3-1 IFK Mariestad (D4)\n", + " 77 Sköllersta IF (D4) 1-3 Hemgårdarnas BK (D4)\n", + " 78 Simonstorps IF (D4) 0-5 Nyköpings BIS (D2)\n", + " 79 Ringarums IF (D4) 1-4 Värmbols FC (D4)\n", + " 80 Dagsbergs IF (D4) 1-0 Malmköpings IF (D4)\n", + " 81 Katrineholms SK (D4) 0-2 BK Kenty (D4)\n", + " 82 Härad IF (D4) 2-3 (gg) IFK Västerås (D2)\n", + " 83 Kolsva IF (D4) 0-3 Karlstad BK (D2)\n", + " 84 Laxå IF (D4) 0-4 IF Sylvia (D2)\n", + " 85 Ransta IK (D4) 1-3 IFK Hallsberg (D4)\n", + " 86 Skyllbergs IK (WC) 0-4 IFK Kristinehamn (D4)\n", + " 87 Filipstads FF (D4) 3-1 Kungsörs SK (D4)\n", + " 88 Hallstahammars SK (D4) 0-7 IFK Eskilstuna (D2)\n", + " 89 BK Hird (D4) 0-5 Hargs BK (D2)\n", + " 90 Vretstorps IF (WC) 1-5 IFK Ölme (D3)\n", + " 91 Frövi IK (WC) 1-9 Skiljebo SK (D3)\n", + " 92 IF Rune (WC) 1-3 Gnesta FF (D3)\n", + " 93 Västerås BK 30 (WC) 0-8 Örebro SK Ungdom (D4)\n", + " 94 VoIF Diana (WC) 4-0 Enskede IK (D4)\n", + " 95 New Mill FF (WC) 4-2 Värtans SK (D3)\n", + " 96 Runtuna IK/Löthen (WC) 1-2 Huddinge IF (D3)\n", + "who is the first away team on the chart\n", + "```pandas\n", + "df[df['Name in Turkish'].str.contains('Tuz')]['Depth']\n", + "```\n", + " Name in English Name in Turkish Area (km2) Depth Location (districts and/or provinces)\n", + " Lake Van Van Gölü 3755 km2 451 m Van, Bitlis\n", + " Lake Tuz Tuz Gölü 1500 km2 2 m Aksaray, Ankara, Konya\n", + " Lake Beyşehir Beyşehir Gölü 656 km2 10 m Beyşehir in Konya, Isparta\n", + " Lake Eğirdir Eğirdir Gölü 482 km2 NaN Isparta\n", + " Lake İznik İznik Gölü 308 km2 NaN İznik in Bursa, Yalova\n", + " Lake Burdur Burdur Gölü 200 km2 NaN Burdur, Isparta\n", + " Lake Manyas Manyas Gölü 166 km2 NaN Balıkesir\n", + " Lake Acıgöl Acıgöl 153 km2 NaN Denizli, Afyonkarahisar\n", + " Lake Uluabat Uluabat Gölü 134 km2 1–2 m Bursa\n", + " Lake Çıldır Çıldır Gölü 115 km2 NaN Ardahan, Kars\n", + " Lake Palas Tuzla Palas Tuzla Gölü 106 km2 15 m Palas/Kayseri\n", + " Lake Akşehir Akşehir Gölü 105 km2 NaN Akşehir in Konya, Afyonkarahisar\n", + " Lake Eber Eber Gölü 104 km2 NaN Afyonkarahisar\n", + " Lake Erçek Erçek Gölü 98 km2 NaN Van\n", + " Lake Hazar Hazar Gölü 86 km2 NaN Elazığ\n", + " Lake Bafa Bafa Gölü 60 km2 NaN Aydın, Muğla\n", + " Lake Köyceğiz Köyceğiz Gölü 52 km2 NaN Köyceğiz in Muğla\n", + " Lake Işıklı Işıklı Gölü 49 km2 NaN Denizli\n", + " Lake Nazik Nazik Gölü 48 km2 NaN Bitlis\n", + " Lake Sapanca Sapanca Gölü 47 km2 NaN Sakarya Province\n", + " Lake Salda Salda Gölü 45 km2 184 m Burdur\n", + " Lake Yay Yay Gölü 37 km2 NaN Kayseri\n", + " Lake Akyatan Akyatan Gölü 35 km2 NaN Adana\n", + " Lake Balık Balık Gölü 34 km2 NaN Doğubeyazıt in Ağrı\n", + " Lake Marmara Marmara Gölü 34 km2 NaN Salihli, Gölmarmara in Manisa\n", + " Lake Çöl Çöl Gölü 32 km2 NaN Ankara\n", + "Lake Durusu (Lake Terkos) Durusu Gölü 25 km2 NaN İstanbul\n", + " Lake Karine Karine Gölü 24 km2 NaN NaN\n", + " Lake Tuzla Tuzla Gölü 23 km2 NaN Tuzla\n", + " Lake Küçükçekmece Küçükçekmece Gölü 16 km2 NaN Küçükçekmece, İstanbul\n", + " Lake Yaraşlı Yaraşlı Gölü 16 km2 NaN Burdur\n", + " Lake Haçlı Haçlı Gölü 16 km2 NaN Muş\n", + " Lake Seyfe Seyfe Gölü 15 km2 NaN Kırşehir\n", + " Lake Akyayan Akyayan Gölü 15 km2 NaN NaN\n", + " Lake Hozapin Hozapin Gölü 14 km2 NaN NaN\n", + " Lake Arin Arin Gölü 13 km2 NaN NaN\n", + " Lake Nemrut Nemrut Gölü 12 km2 NaN Bitlis Province\n", + " Lake Balık Balık Gölü 12 km2 NaN NaN\n", + " Lake Büyükçekmece Büyükçekmece Gölü 11 km2 NaN Büyükçekmece, Istanbul\n", + " Lake Boluk Boluk Gölü 11 km2 NaN NaN\n", + " Lake Akdoğan Akdoğan Gölü 11 km2 NaN NaN\n", + " Lake Çavuşlu Çavuşlu Gölü 9 km2 NaN NaN\n", + " Lake Düden Düden Gölü 8 km2 NaN NaN\n", + " Lake Gala Gala Gölü 8 km2 NaN Edirne\n", + " Lake Karataş Karataş Gölü 6 km2 NaN NaN\n", + " Lake Mogan Mogan Gölü 6 km2 NaN Ankara\n", + " Paradeniz Paradeniz 4 km2 NaN Mersin\n", + " Lake Eymir Eymir Gölü 1.8 km2 NaN Ankara\n", + " Lake Abant Abant Gölü 1.28 km2 18 m Bolu\n", + " Lake Gölcük Gölcük Gölü 1 km2 NaN İzmir\n", + "which is deeper, lake tuz or lake palas tuzla?\n", + "```json\n", + "df.query('Hand == \"Full house\" & 2 credits == 4 & theoretical_return > 98.68%')['Hand']\n", + "```\n", + " Hand 1 credit 2 credits 3 credits 4 credits 5 credits\n", + " Royal flush 250 500 750 1000 4000*\n", + " Straight flush 60 120 180 240 400\n", + " Four aces 400 800 1200 1600 2000\n", + "Four of a kind, 2-4 100 200 300 400 500\n", + "Four of a kind, 5-K 50 100 150 200 250\n", + " Full house 8 16 24 32 40\n", + " Flush 5 10 15 20 25\n", + " Straight 4 8 12 16 20\n", + " Three of a kind 3 6 9 12 15\n", + " Two pair 1 2 3 4 5\n", + " Jacks or better 1 2 3 4 5\n", + " Theoretical return 98.68% 98.68% 98.68% 98.68% 99.92%*\n", + "after winning on four credits with a full house, what is your payout?\n", + "```pandas\n", + "df[df['Position'] == 'Middle blocker']\n", + "```\n", + " No. Player Birth Date Weight Height Position Current Club\n", + " 4 Ardo Kreek August 7, 1986 (age 27) 96 203 Middle blocker Paris Volley\n", + " 5 Kert Toobal June 3, 1979 (age 35) 78 189 Setter Sivas 4 Eylül\n", + " 6 Martti Juhkami June 6, 1988 (age 26) 96 196 Spiker TV Bühl\n", + " 7 Argo Meresaar January 13, 1980 (age 34) 107 206 Opposite Bigbank Tartu\n", + " 8 Kusti Nõlvak November 6, 1991 (age 22) 81 186 Setter TTÜ VK\n", + " 9 Robert Täht August 15, 1993 (age 20) 80 190 Spiker Bigbank Tartu\n", + " 11 Oliver Venno May 23, 1990 (age 24) 105 210 Opposite Rennes Volley 35\n", + " 14 Rait Rikberg August 30, 1982 (age 31) 80 174 Libero Bigbank Tartu\n", + " 16 Edgar Järvekülg June 12, 1988 (age 26) 77 186 Libero Pärnu VK\n", + " 17 Siim Ennemuist December 5, 1989 (age 24) 89 196 Middle blocker TTÜ VK\n", + " 18 Jaanus Nõmmsalu January 19, 1981 (age 33) 94 200 Spiker TTÜ VK\n", + " 19 Andri Aganits September 7, 1993 (age 20) 99 207 Middle Blocker TV Bühl\n", + "which players played the same position as ardo kreek?\n" + ] + } + ], + "source": [ + "for i in range(n):\n", + " print(code[i])\n", + " print(pd.read_csv('data/' + train.context.iloc[i]).to_string(index=False))\n", + " print(train.utterance.iloc[i])" + ] + }, + { + "cell_type": "code", + "execution_count": 87, + "id": "2bb82209-c50e-4a31-a10a-77be411f4b4d", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "df[df['Position'] == '1st']['Venue'].iloc[-1]\n", + "\n", + "\n", + "df['Opponent'][0]\n", + "\n", + "\n", + "\n", + "\n" + ] + } + ], + "source": [ + "for i in range(n):\n", + " # Очищаем код для текущей итерации\n", + " current_code = parse_panda_code(code[i])\n", + " \n", + " #print(f\"Итерация {i} | Запускаем выражение: {current_code}\")\n", + " \n", + " try:\n", + " # Читаем нужный датафрейм\n", + " df = pd.read_csv('data/' + train.context.iloc[i])\n", + " target = train.iloc[i].targetValue\n", + " \n", + " # Передаем в eval() код ИМЕННО для текущей итерации\n", + " result = eval(current_code)\n", + " \n", + " # print(\"--- Результат ---\")\n", + " # print(result)\n", + " # print(\"--- Правильный ответ ---\")\n", + " # print(target)\n", + " # print(\"-\" * 40)\n", + " if result == target:\n", + " print(current_code)\n", + " \n", + " except Exception as e:\n", + " print()\n", + " # Выводим реальный текст ошибки, чтобы понять, в чем проблема\n", + " #print(f\"Ошибка на итерации {i}: {e}\")\n", + " #print(\"-\" * 40)" + ] + }, + { + "cell_type": "code", + "execution_count": 61, + "id": "34a693df-cd37-4075-869b-1e1b837da533", + "metadata": {}, + "outputs": [], + "source": [ + "df = pd.read_csv('data/'+ train.context.iloc[0])" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "id": "4d229f11-3a1f-4b98-a860-b2dc58c623e7", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "WARNING: Ignoring invalid distribution ~atplotlib (C:\\Users\\PC\\anaconda3\\Lib\\site-packages)\n" + ] + } + ], + "source": [ + "! pip freeze > requirements.txt" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "c752e932-710b-4e0c-ac77-d0ec621c0e9c", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.3" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/WTQ/Qwen/config.py b/WTQ/Qwen/config.py new file mode 100644 index 0000000..f4a43ba --- /dev/null +++ b/WTQ/Qwen/config.py @@ -0,0 +1,57 @@ +system_prompt = '''You are a Python expert specializing in pandas. You are given a question and a table. Your task is to translate the given natural language question into +a single-line pandas expression. This expression, which acts like a query, must +be valid and executable so that running the pandas expression will output the +answer to the question. Consider the following: +1. The table is represented as a pandas DataFrame named df. +2. Do not include explanations, comments, or multiline outputs. +3. Ensure the output is concise, correct, and when run, it outputs the correct +given answer, and strictly follows the Json format: {{"PANDA": ""}} + +### Table schema +{table} + +### Query +{query} + +''' + +logic_prompt = '''You are an expert in Python with a specialization in pandas. Your task is to verify and correct a given pandas code that translates a natural language statement into a pandas expression. The corrected pandas code must accurately evaluate the truth of the statement when applied to the given table. Requirements: + +The table is represented as a pandas DataFrame named df. + +The pandas code must evaluate to a value using the snippet: (eval(pandas_code)). The result can be boolean, number, string, date, or any type that matches the expected answer. + +The corrected pandas code must match the value indicated by the provided "Label". + +Ensure the output is concise, correct, and when run outputs the answer, and strictly in the following JSON format with a single key "PANDA": "CORRECT PANDA": "" + +### Table schema +{table} + +### Pandas code +{pandas} + +### Label +{label} +''' + +correct_prompt = '''You are an expert in Python, specializing in pandas. Your task is to correct a pandas code that translates a given natural language statement into a pandas expression. The code, along with the specific error it contains, is provided. Your corrected pandas_code must be valid and executable when running the code snippet eval(pandas_code), ensuring it accurately evaluates the statement using the provided table with no errors. + +The pandas_code can return any type (boolean, number, string, date, etc.) that matches the expected answer. Consider the following: + +The table is represented as a pandas DataFrame named df. + +Do not include explanations, comments, or multiline outputs. + +Ensure the output is concise, correct, and when run outputs the answer, and strictly in the following JSON format with a single key "PANDA": "" + +### Table schema +{table} + +### Pandas code +{pandas} + +### Label +{label} +''' + diff --git a/WTQ/Qwen/server.py b/WTQ/Qwen/server.py new file mode 100644 index 0000000..03aa306 --- /dev/null +++ b/WTQ/Qwen/server.py @@ -0,0 +1,75 @@ +import uvicorn +from fastapi import FastAPI, HTTPException +from pydantic import BaseModel +import torch +from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig + +# Заменили путь на локальный Qwen 7B +model_name = "D:/models/Qwen2.5-Coder-7B-Instruct" +print("Загрузка модели Qwen 7B в 4-бит на GPU...") + +# Инициализируем модель с 4-битным квантованием под RTX 4060 +tokenizer = AutoTokenizer.from_pretrained(model_name) + +quantization_config = BitsAndBytesConfig( + load_in_4bit=True, + bnb_4bit_quant_type="nf4", + bnb_4bit_compute_dtype=torch.bfloat16, + bnb_4bit_use_double_quant=True +) + +model = AutoModelForCausalLM.from_pretrained( + model_name, + quantization_config=quantization_config, + device_map="cuda:0" # Для квантованных моделей заменяет (.to("cuda")) +) + +app = FastAPI() + +class ChatRequest(BaseModel): + model: str + messages: list + temperature: float = 0.5 + max_tokens: int = 500 + +@app.post("/v1/chat/completions") +async def chat_completions(request: ChatRequest): + try: + user_message = request.messages[-1]["content"] + + text = tokenizer.apply_chat_template([{"role": "user", "content": user_message}], tokenize=False, add_generation_prompt=True) + + # Явно отправляем входные токены на видеокарту ("cuda") + model_inputs = tokenizer([text], return_tensors="pt", truncation=True, max_length=2048).to("cuda") + + if request.temperature <= 0.0: + gen_kwargs = {"do_sample": False} + else: + gen_kwargs = {"do_sample": True, "temperature": request.temperature} + + # Отключаем подсчет градиентов для экономии памяти при генерации + with torch.no_grad(): + generated_ids = model.generate( + **model_inputs, + max_new_tokens=request.max_tokens, + pad_token_id=tokenizer.eos_token_id, + **gen_kwargs + ) + + generated_ids = [output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)] + response_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0] + + return { + "choices": [{ + "message": { + "role": "assistant", + "content": response_text + } + }] + } + except Exception as e: + print(f"\n[ВНУТРЕННЯЯ ОШИБКА СЕРВЕРА]: {e}\n") + raise HTTPException(status_code=500, detail=str(e)) + +if __name__ == "__main__": + uvicorn.run(app, host="0.0.0.0", port=9092) \ No newline at end of file From 69e9189d22caf645077e71ede4e8c4c4937f5623 Mon Sep 17 00:00:00 2001 From: AldarArmaev Date: Fri, 19 Jun 2026 11:36:37 +0800 Subject: [PATCH 09/10] =?UTF-8?q?=D0=A3=D0=B4=D0=B0=D0=BB=D0=B5=D0=BD?= =?UTF-8?q?=D0=B8=D0=B5=20=D1=81=D0=B8=D0=BC=D0=B2=D0=BE=D0=BB=D0=B0=20%?= =?UTF-8?q?=20=D0=A0=D0=B0=D1=81=D0=BF=D0=BE=D0=B7=D0=BD=D0=B0=D0=B2=D0=B0?= =?UTF-8?q?=D0=BD=D0=B8=D0=B5=20=D0=B4=D0=B0=D1=82=20=D1=82=D0=B8=D0=BF?= =?UTF-8?q?=D0=B0=2012.12.26=20=D0=A2=D0=BE=D1=87=D0=BA=D0=B0=20=D0=B1?= =?UTF-8?q?=D0=BE=D0=BB=D1=8C=D1=88=D0=B5=20=D0=BD=D0=B5=20=D1=81=D1=87?= =?UTF-8?q?=D0=B8=D1=82=D0=B0=D0=B5=D1=82=D1=81=D1=8F=20=D1=80=D0=B0=D0=B7?= =?UTF-8?q?=D0=B4=D0=B5=D0=BB=D0=B8=D1=82=D0=B5=D0=BB=D0=B5=D0=BC=20=D1=82?= =?UTF-8?q?=D1=8B=D1=81=D1=8F=D1=87=20=D0=B4=D0=BB=D1=8F=20=D1=86=D0=B5?= =?UTF-8?q?=D0=BB=D1=8B=D1=85=20=D1=87=D0=B8=D1=81=D0=B5=D0=BB=20=D0=9F?= =?UTF-8?q?=D1=80=D0=BE=D0=BF=D1=83=D1=81=D0=BA=D0=B8=20=D0=B8=20None=20?= =?UTF-8?q?=D0=BA=D0=BE=D0=BD=D0=B2=D0=B5=D1=80=D1=82=D0=B8=D1=80=D1=83?= =?UTF-8?q?=D1=8E=D1=82=D1=81=D1=8F=20=D0=B2=20=D1=82=D0=B8=D0=BF=20'None'?= =?UTF-8?q?=20=D0=A3=D0=B1=D1=80=D0=B0=D0=BD=D0=BE=20=D0=B1=D1=83=D0=BB?= =?UTF-8?q?=D0=B5=D0=B2=D0=BE=20=D1=80=D0=B0=D1=81=D0=BF=D0=BE=D0=B7=D0=BD?= =?UTF-8?q?=D0=B0=D0=B2=D0=B0=D0=BD=D0=B8=D0=B5=20[]=20=D0=A3=D0=B1=D1=80?= =?UTF-8?q?=D0=B0=D0=BD=20=D0=BF=D1=80=D0=BE=D1=87=D0=B5=D1=80=D0=BA=20-?= =?UTF-8?q?=20=D0=BA=D0=B0=D0=BA=20false=20=D0=94=D0=BE=D0=B1=D0=B0=D0=B2?= =?UTF-8?q?=D0=BB=D0=B5=D0=BD=D0=B0=20=D0=BE=D0=B1=D1=80=D0=B0=D0=B1=D0=BE?= =?UTF-8?q?=D1=82=D0=BA=D0=B0=20=D1=82=D0=B5=D0=BA=D1=81=D1=82=D0=BE=D0=B2?= =?UTF-8?q?=D1=8B=D1=85=20=D0=BC=D0=BD=D0=BE=D0=B6=D0=B8=D1=82=D0=B5=D0=BB?= =?UTF-8?q?=D0=B5=D0=B9=20(=C2=AB=D1=82=D1=8B=D1=81=C2=BB,=20=C2=ABk=C2=BB?= =?UTF-8?q?,=20=C2=ABmillion=C2=BB=20=D0=B8=20=D1=82.=D0=BF.)=20=D0=A0?= =?UTF-8?q?=D0=B0=D1=81=D1=88=D0=B8=D1=80=D0=B5=D0=BD=20float=5Fpattern=20?= =?UTF-8?q?=D0=B4=D0=BB=D1=8F=20=D1=80=D0=B0=D1=81=D0=BF=D0=BE=D0=B7=D0=BD?= =?UTF-8?q?=D0=B0=D0=B2=D0=B0=D0=BD=D0=B8=D1=8F=201.234=20=D0=BA=D0=B0?= =?UTF-8?q?=D0=BA=20float?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- utils/type_check.py | 111 ++++++++++++++++++++++++++++++++------------ 1 file changed, 82 insertions(+), 29 deletions(-) diff --git a/utils/type_check.py b/utils/type_check.py index c7e7450..a6e5115 100644 --- a/utils/type_check.py +++ b/utils/type_check.py @@ -3,7 +3,57 @@ from concurrent.futures import ThreadPoolExecutor from typing import Union, Any, Optional, Dict, Tuple +# ---------------------------------------------------------------------- +# Множители для текстовых суффиксов (тыс, млн, k, M и т.п.) +# ---------------------------------------------------------------------- +MULTIPLIERS = { + # Английские + 'k': 10**3, 'thousand': 10**3, + 'm': 10**6, 'million': 10**6, + 'b': 10**9, 'billion': 10**9, + 't': 10**12, 'trillion': 10**12, + # Русские (транслитерация и кириллица) + 'тыс': 10**3, 'тысяч': 10**3, 'тысяча': 10**3, + 'млн': 10**6, 'миллион': 10**6, + 'млрд': 10**9, 'миллиард': 10**9, + 'трлн': 10**12, 'триллион': 10**12, + 'сотня': 100, 'сотни': 100, 'сотен': 100 +} + +MULTIPLIER_PATTERN = re.compile( + r'^([+-]?\d+(?:[.,]\d+)?)\s*(' + '|'.join(MULTIPLIERS.keys()) + r')$', + re.IGNORECASE +) + +def apply_multiplier(s: str) -> str: + """Если строка заканчивается известным множителем, умножает число и возвращает строку. + Иначе возвращает исходную строку без изменений.""" + match = MULTIPLIER_PATTERN.match(s) + if not match: + return s + num_str, suffix = match.groups() + # Приводим суффикс к нижнему регистру для поиска в словаре + multiplier = MULTIPLIERS.get(suffix.lower()) + if multiplier is None: + return s + + # Преобразуем числовую часть во float (поддерживаются оба разделителя) + num_str_clean = num_str.replace(',', '.') + try: + value = float(num_str_clean) * multiplier + except ValueError: + return s + + # Возвращаем без экспоненциальной записи, целые — без десятичной точки + if value.is_integer(): + return str(int(value)) + else: + # Убираем лишние нули после запятой + return f"{value:.10f}".rstrip('0').rstrip('.') +# ---------------------------------------------------------------------- +# Оригинальные функции (с доработками) +# ---------------------------------------------------------------------- def clean_value(value: Any) -> str: """Базовая очистка значения (удаление спецсимволов)""" if value is None: @@ -12,8 +62,7 @@ def clean_value(value: Any) -> str: if not isinstance(value, str): value = str(value) - # Заменяем неразрывные пробелы и - # другие специальные символы + # Заменяем неразрывные пробелы и другие специальные символы value = value.replace('\xa0', ' ') value = value.replace('\ufeff', '') value = value.replace('\u200b', '') @@ -22,10 +71,10 @@ def clean_value(value: Any) -> str: def extract_number_string(s: str) -> str: - """Подготовка строки для проверки на число (удаление валют, скобок, текста)""" - # Удаляем символы валют в начале/конце - s = re.sub(r'^[$€£¥₽\s]*', '', s) - s = re.sub(r'[$€£¥₽\s]*$', '', s) + """Подготовка строки для проверки на число (удаление валют, %, скобок, текста)""" + # Удаляем символы валют и % в начале/конце + s = re.sub(r'^[$€£¥₽%\s]*', '', s) + s = re.sub(r'[$€£¥₽%\s]*$', '', s) # Обработка скобок (финансовый формат) if s.startswith('(') and s.endswith(')'): @@ -39,6 +88,7 @@ def extract_number_string(s: str) -> str: return s.strip() + def check_type_comprehensive(data: Union[pd.Series, list, Any]) -> Tuple[str, int]: """ Определяет тип данных. Все значения проверяются через регулярные выражения. @@ -49,11 +99,9 @@ def check_type_comprehensive(data: Union[pd.Series, list, Any]) -> Tuple[str, in # 2. Если это одиночное значение - создаем список из одного элемента if not isinstance(data, (pd.Series, list)): - # Преобразуем одиночное значение в список для единообразной обработки values = [data] is_single_value = True else: - # Если это Series или список if isinstance(data, pd.Series): values = data.tolist() else: @@ -62,18 +110,24 @@ def check_type_comprehensive(data: Union[pd.Series, list, Any]) -> Tuple[str, in # РЕГУЛЯРНЫЕ ВЫРАЖЕНИЯ - # Булевы значения - расширенный набор - bool_true_pattern = re.compile(r'^(true|yes|да|истина|1|\+)$', re.IGNORECASE) - bool_false_pattern = re.compile(r'^(false|no|нет|ложь|0|-|\[ \])$', re.IGNORECASE) + # Булевы значения – убраны [] и прочерк + bool_true_pattern = re.compile(r'^(true|yes|да|истина)$', re.IGNORECASE) + bool_false_pattern = re.compile(r'^(false|no|нет|ложь)$', re.IGNORECASE) # Базовые числовые паттерны (после очистки) exp_pattern = re.compile(r'^[-+]?(?:\d+(?:[.,]\d*)?|[.,]\d+)[eE][-+]?\d+$') # Паттерны для float с поддержкой разных разделителей - float_pattern = re.compile(r'^[+-]?(?:\d{1,3}(?:[ ,.]\d{3})*[.,]\d+|\d+[.,]\d+|[.,]\d+|\d+[.,])$') - - # Паттерн для int с разделителями тысяч (точка, запятая или пробел) - int_with_separators_pattern = re.compile(r'^[+-]?\d{1,3}(?:[ ,.]\d{3})*$') + float_pattern = re.compile( + r'^[+-]?(?:\d{1,3}(?:[ ,.]\d{3})*(?:[.,]\d+)?' # разделители тысяч + опциональная дробная часть + r'|\d+[.,]\d+' # простое десятичное число + r'|[.,]\d+' # начинается с разделителя + r'|\d+[.,]' # заканчивается разделителем + r')$' + ) + + # Паттерн для int с разделителями тысяч (только пробел и запятая, точка исключена) + int_with_separators_pattern = re.compile(r'^[+-]?\d{1,3}(?:[ ,]\d{3})*$') int_pattern = re.compile(r'^[+-]?\d+$') # Паттерны для дат и времени с улучшениями @@ -83,14 +137,16 @@ def check_type_comprehensive(data: Union[pd.Series, list, Any]) -> Tuple[str, in 'date_eu': re.compile(r'^\d{1,2}\.\d{1,2}\.\d{4}$'), 'date_slash': re.compile(r'^\d{1,2}/\d{1,2}/\d{4}$'), 'date_year_last': re.compile(r'^\d{1,2}-\d{1,2}-\d{2}$'), - # Новые паттерны для дат с буквенными месяцами + # Новый шаблон для 12.12.26 (DD.MM.YY) + 'date_eu_short': re.compile(r'^\d{1,2}\.\d{1,2}\.\d{2}$'), + # Паттерны для дат с буквенными месяцами 'date_month_short': re.compile(r'^\d{1,2}-[A-Za-z]{3,9}-\d{4}$'), 'date_month_short_dot': re.compile(r'^\d{1,2}\.[A-Za-z]{3,9}\.\d{4}$'), 'date_month_long': re.compile(r'^\d{1,2}\s+[A-Za-z]{3,9}\s+\d{4}$'), 'date_month_short_comma': re.compile(r'^[A-Za-z]{3,9}\s+\d{1,2},\s+\d{4}$'), } - # ИСПРАВЛЕНИЕ 3: Паттерны для времени с необязательными ведущими нулями + # Паттерны для времени time_patterns = { 'time_basic': re.compile(r'^\d{1,2}:\d{1,2}$'), 'time_seconds': re.compile(r'^\d{1,2}:\d{1,2}:\d{1,2}$'), @@ -121,7 +177,6 @@ def check_type_comprehensive(data: Union[pd.Series, list, Any]) -> Tuple[str, in type_counts['int'] = type_counts.get('int', 0) + 1 continue elif isinstance(value, float): - # Проверяем, является ли float целым числом if value.is_integer(): type_counts['int'] = type_counts.get('int', 0) + 1 else: @@ -134,9 +189,12 @@ def check_type_comprehensive(data: Union[pd.Series, list, Any]) -> Tuple[str, in type_counts['empty'] = type_counts.get('empty', 0) + 1 continue + # ----- ПРИМЕНЯЕМ ТЕКСТОВЫЕ МНОЖИТЕЛИ (тыс, млн, k, M и т.д.) ----- + str_value = apply_multiplier(str_value) + found_type = False - # 1. Проверка на BOOL (кроме одиночных символов) + # 1. Проверка на BOOL (кроме одиночных символов, '-' не участвует) if not found_type and len(str_value) > 1: if bool_true_pattern.match(str_value) or bool_false_pattern.match(str_value): type_counts['bool'] = type_counts.get('bool', 0) + 1 @@ -154,7 +212,7 @@ def check_type_comprehensive(data: Union[pd.Series, list, Any]) -> Tuple[str, in except: continue - # 3. Проверка на ДАТУ + # 3. Проверка на ДАТУ (добавлен date_eu_short) if not found_type: for pattern in date_patterns.values(): if pattern.match(str_value): @@ -189,10 +247,9 @@ def check_type_comprehensive(data: Union[pd.Series, list, Any]) -> Tuple[str, in if re.match(r'^\d+\.0+$', cleaned_num): cleaned_num = cleaned_num.split('.')[0] - # Удаляем все пробелы для чисел с пробелами в качестве разделителей + # Удаляем все пробелы для чисел с пробелами в качестве разделителей, # но сохраняем десятичный разделитель if ' ' in cleaned_num and (',' in cleaned_num or '.' in cleaned_num): - # Для чисел с десятичными разделителями сохраняем последний разделитель if ',' in cleaned_num: parts = cleaned_num.split(',') cleaned_num = parts[0].replace(' ', '') + ',' + parts[1] @@ -200,24 +257,22 @@ def check_type_comprehensive(data: Union[pd.Series, list, Any]) -> Tuple[str, in parts = cleaned_num.split('.') cleaned_num = parts[0].replace(' ', '') + '.' + parts[1] - # Сначала проверяем int с разделителями тысяч + # int с разделителями (только пробел/запятая, без точки) if int_with_separators_pattern.match(cleaned_num.replace(' ', '')): type_counts['int'] = type_counts.get('int', 0) + 1 found_type = True - # Проверяем обычный int elif int_pattern.match(cleaned_num): type_counts['int'] = type_counts.get('int', 0) + 1 found_type = True - # Проверяем float (уже включает разные форматы) elif float_pattern.match(cleaned_num): type_counts['float'] = type_counts.get('float', 0) + 1 found_type = True # Все остальное - строка if not found_type: - # Одиночные символы обрабатываем отдельно + # Одиночные символы – '-' исключён из bool if len(str_value) == 1: - if str_value in ['+', '-', '1', '0']: + if str_value in ['+', '1', '0']: # '-' больше не считается bool type_counts['bool'] = type_counts.get('bool', 0) + 1 else: type_counts['str'] = type_counts.get('str', 0) + 1 @@ -234,11 +289,9 @@ def check_type_comprehensive(data: Union[pd.Series, list, Any]) -> Tuple[str, in if not valid_types: return 'None', nan_count - # Для одиночного значения возвращаем его тип if is_single_value and len(valid_types) == 1: result_type = list(valid_types.keys())[0] else: - # Для списка/Series находим наиболее частый тип result_type = max(valid_types, key=valid_types.get) # Если есть и int и float - считаем float From be24dd5fec46a7c89fa4e28de90b4322313e05bf Mon Sep 17 00:00:00 2001 From: AldarArmaev Date: Fri, 19 Jun 2026 11:49:46 +0800 Subject: [PATCH 10/10] =?UTF-8?q?=D0=A3=D0=B4=D0=B0=D0=BB=D0=B5=D0=BD?= =?UTF-8?q?=D0=B8=D0=B5=20=D1=81=D0=B8=D0=BC=D0=B2=D0=BE=D0=BB=D0=B0=20%?= =?UTF-8?q?=20=D0=A0=D0=B0=D1=81=D0=BF=D0=BE=D0=B7=D0=BD=D0=B0=D0=B2=D0=B0?= =?UTF-8?q?=D0=BD=D0=B8=D0=B5=20=D0=B4=D0=B0=D1=82=20=D1=82=D0=B8=D0=BF?= =?UTF-8?q?=D0=B0=2012.12.26=20=D0=A2=D0=BE=D1=87=D0=BA=D0=B0=20=D0=B1?= =?UTF-8?q?=D0=BE=D0=BB=D1=8C=D1=88=D0=B5=20=D0=BD=D0=B5=20=D1=81=D1=87?= =?UTF-8?q?=D0=B8=D1=82=D0=B0=D0=B5=D1=82=D1=81=D1=8F=20=D1=80=D0=B0=D0=B7?= =?UTF-8?q?=D0=B4=D0=B5=D0=BB=D0=B8=D1=82=D0=B5=D0=BB=D0=B5=D0=BC=20=D1=82?= =?UTF-8?q?=D1=8B=D1=81=D1=8F=D1=87=20=D0=B4=D0=BB=D1=8F=20=D1=86=D0=B5?= =?UTF-8?q?=D0=BB=D1=8B=D1=85=20=D1=87=D0=B8=D1=81=D0=B5=D0=BB=20=D0=9F?= =?UTF-8?q?=D1=80=D0=BE=D0=BF=D1=83=D1=81=D0=BA=D0=B8=20=D0=B8=20None=20?= =?UTF-8?q?=D0=BA=D0=BE=D0=BD=D0=B2=D0=B5=D1=80=D1=82=D0=B8=D1=80=D1=83?= =?UTF-8?q?=D1=8E=D1=82=D1=81=D1=8F=20=D0=B2=20=D1=82=D0=B8=D0=BF=20'None'?= =?UTF-8?q?=20=D0=A3=D0=B1=D1=80=D0=B0=D0=BD=D0=BE=20=D0=B1=D1=83=D0=BB?= =?UTF-8?q?=D0=B5=D0=B2=D0=BE=20=D1=80=D0=B0=D1=81=D0=BF=D0=BE=D0=B7=D0=BD?= =?UTF-8?q?=D0=B0=D0=B2=D0=B0=D0=BD=D0=B8=D0=B5=20[]=20=D0=A3=D0=B1=D1=80?= =?UTF-8?q?=D0=B0=D0=BD=20=D0=BF=D1=80=D0=BE=D1=87=D0=B5=D1=80=D0=BA=20-?= =?UTF-8?q?=20=D0=BA=D0=B0=D0=BA=20false=20=D0=94=D0=BE=D0=B1=D0=B0=D0=B2?= =?UTF-8?q?=D0=BB=D0=B5=D0=BD=D0=B0=20=D0=BE=D0=B1=D1=80=D0=B0=D0=B1=D0=BE?= =?UTF-8?q?=D1=82=D0=BA=D0=B0=20=D1=82=D0=B5=D0=BA=D1=81=D1=82=D0=BE=D0=B2?= =?UTF-8?q?=D1=8B=D1=85=20=D0=BC=D0=BD=D0=BE=D0=B6=D0=B8=D1=82=D0=B5=D0=BB?= =?UTF-8?q?=D0=B5=D0=B9=20(=C2=AB=D1=82=D1=8B=D1=81=C2=BB,=20=C2=ABk=C2=BB?= =?UTF-8?q?,=20=C2=ABmillion=C2=BB=20=D0=B8=20=D1=82.=D0=BF.)=20=D0=A0?= =?UTF-8?q?=D0=B0=D1=81=D1=88=D0=B8=D1=80=D0=B5=D0=BD=20float=5Fpattern=20?= =?UTF-8?q?=D0=B4=D0=BB=D1=8F=20=D1=80=D0=B0=D1=81=D0=BF=D0=BE=D0=B7=D0=BD?= =?UTF-8?q?=D0=B0=D0=B2=D0=B0=D0=BD=D0=B8=D1=8F=201.234=20=D0=BA=D0=B0?= =?UTF-8?q?=D0=BA=20float?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- utils/type_check.py | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/utils/type_check.py b/utils/type_check.py index 81c70c4..a6e5115 100644 --- a/utils/type_check.py +++ b/utils/type_check.py @@ -71,20 +71,10 @@ def clean_value(value: Any) -> str: def extract_number_string(s: str) -> str: -<<<<<<< HEAD """Подготовка строки для проверки на число (удаление валют, %, скобок, текста)""" # Удаляем символы валют и % в начале/конце s = re.sub(r'^[$€£¥₽%\s]*', '', s) s = re.sub(r'[$€£¥₽%\s]*$', '', s) -======= - """Подготовка строки для проверки на число (удаление валют, скобок, текста)""" - # Нормализация разных видов минусов и дефисов - s = s.replace('−', '-').replace('–', '-').replace('—', '-') - - # Удаляем символы валют в начале/конце - s = re.sub(r'^[$€£¥₽\s]*', '', s) - s = re.sub(r'[$€£¥₽\s]*$', '', s) ->>>>>>> e465acec6c8c10f8afb1068aeeb0b52163d3835a # Обработка скобок (финансовый формат) if s.startswith('(') and s.endswith(')'): @@ -177,7 +167,7 @@ def check_type_comprehensive(data: Union[pd.Series, list, Any]) -> Tuple[str, in for value in values: if (pd.isna(value) or - (isinstance(value, str) and value.lower() in ['nan', 'na', 'n/a', 'nill', 'none','—','?'])): + (isinstance(value, str) and value.lower() in ['nan', 'na', 'n/a','nill'])): nan_count += 1 continue @@ -331,5 +321,4 @@ def analyze_dataset_parallel(dataset: pd.DataFrame, max_workers: Optional[int] = column_name, column_result = future.result() results[column_name] = column_result - return results - + return results \ No newline at end of file