From e976b55a5c2a83f4f2ab1221ea228c9b8737db4f Mon Sep 17 00:00:00 2001 From: Simeon Wong Date: Thu, 14 Nov 2024 20:32:09 -0500 Subject: [PATCH 01/17] delete ip logs... WARNING UNTESTED! --- 02_activities/assignments/assignment.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index d81e9a77b..001291867 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -33,6 +33,7 @@ unzip rawdata.zip # 6. Repeat the above step for user logs and event logs # 7. For user privacy, remove all files containing IP addresses (files with "ipaddr" in the filename) from ./data/raw and ./data/processed/user_logs +rf -rf ./data # 8. Create a file named ./data/inventory.txt that lists all the files in the subfolders of ./data/processed From ea20676d33161a6f4d0fcd3c4f7aa5360f0f4309 Mon Sep 17 00:00:00 2001 From: Simeon Wong Date: Thu, 14 Nov 2024 20:55:44 -0500 Subject: [PATCH 02/17] initialize README file with company name --- 02_activities/assignments/assignment.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index 001291867..f2bfd22bd 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -11,6 +11,7 @@ set -x mkdir analysis output touch README.md +echo "# Project Name: DSI Consulting Inc." > README.md touch analysis/main.py # download client data From 4638193358114f596e2504d97d5268948ab81638 Mon Sep 17 00:00:00 2001 From: Michelle Wang Date: Tue, 3 Mar 2026 14:37:53 -0500 Subject: [PATCH 03/17] Complete assignment.sh --- 02_activities/assignments/assignment.sh | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index 4b48cec8b..4cf7caf4f 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -28,22 +28,37 @@ unzip -q rawdata.zip # Complete assignment here # 1. Create a directory named data +mkdir data # 2. Move the ./rawdata directory to ./data/raw (eg. move it into ./data and rename it to raw) +mv ./rawdata ./data/raw # 3. List the contents of the ./data/raw directory +ls ./data/raw # 4. Create the directory ./data/processed, +cd data +mkdir processed + # then create the following sub-directories within it: server_logs, user_logs, and event_logs +cd processed +mkdir server_logs user_logs event_logs # 5. Copy all server log files (files with "server" in the name AND a .log extension) from ./data/raw to ./data/processed/server_logs +cp ./data/raw/server*.log ./data/processed/server_logs # 6. Repeat the above step for user logs and event logs +cp ./data/raw/user*.log ./data/processed/user_logs +cp ./data/raw/event*.log ./data/processed/event_logs # 7. For user privacy, remove all files containing IP addresses (files with "ipaddr" in the filename) from ./data/raw and ./data/processed/user_logs +rm ./data/raw/*ipaddr* +rm ./data/processed/user_logs/*ipaddr* # 8. Create a file named ./data/inventory.txt that lists all the files in the subfolders of ./data/processed - +cd ../ +touch inventory.txt +find ./data/processed -type f > inventory.txt ########################################### From 90b6644c3a7b790ab2f5035ea16506c0f131486b Mon Sep 17 00:00:00 2001 From: Michelle Wang Date: Tue, 3 Mar 2026 14:53:52 -0500 Subject: [PATCH 04/17] Completed assignment.sh --- 02_activities/assignments/assignment.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index 4cf7caf4f..c631f1165 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -45,6 +45,7 @@ cd processed mkdir server_logs user_logs event_logs # 5. Copy all server log files (files with "server" in the name AND a .log extension) from ./data/raw to ./data/processed/server_logs +cd ../ cp ./data/raw/server*.log ./data/processed/server_logs # 6. Repeat the above step for user logs and event logs From e4f0854ccaabac4ee71bb27a96810438c4919e4f Mon Sep 17 00:00:00 2001 From: Michelle Wang Date: Tue, 3 Mar 2026 15:06:47 -0500 Subject: [PATCH 05/17] Completed assignment.sh --- 02_activities/assignments/assignment.sh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index c631f1165..aafadd488 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -46,7 +46,7 @@ mkdir server_logs user_logs event_logs # 5. Copy all server log files (files with "server" in the name AND a .log extension) from ./data/raw to ./data/processed/server_logs cd ../ -cp ./data/raw/server*.log ./data/processed/server_logs +cp ./data/raw/*server*.log ./data/processed/server_logs # 6. Repeat the above step for user logs and event logs cp ./data/raw/user*.log ./data/processed/user_logs @@ -57,7 +57,6 @@ rm ./data/raw/*ipaddr* rm ./data/processed/user_logs/*ipaddr* # 8. Create a file named ./data/inventory.txt that lists all the files in the subfolders of ./data/processed -cd ../ touch inventory.txt find ./data/processed -type f > inventory.txt From 097c713725eefe8de1f5645e8ab17b90ac16c376 Mon Sep 17 00:00:00 2001 From: Michelle Wang Date: Tue, 3 Mar 2026 15:28:14 -0500 Subject: [PATCH 06/17] Complete assignment.sh --- 02_activities/assignments/assignment.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index aafadd488..1a1cddefa 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -46,7 +46,7 @@ mkdir server_logs user_logs event_logs # 5. Copy all server log files (files with "server" in the name AND a .log extension) from ./data/raw to ./data/processed/server_logs cd ../ -cp ./data/raw/*server*.log ./data/processed/server_logs +cp -R ./data/raw/*server*.log ./data/processed/server_logs # 6. Repeat the above step for user logs and event logs cp ./data/raw/user*.log ./data/processed/user_logs From 8ae50636a19a008fb9b5387f90781f46bfff0262 Mon Sep 17 00:00:00 2001 From: Michelle Wang Date: Tue, 3 Mar 2026 15:36:13 -0500 Subject: [PATCH 07/17] Completed --- 02_activities/assignments/assignment.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index 1a1cddefa..6622de247 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -46,7 +46,7 @@ mkdir server_logs user_logs event_logs # 5. Copy all server log files (files with "server" in the name AND a .log extension) from ./data/raw to ./data/processed/server_logs cd ../ -cp -R ./data/raw/*server*.log ./data/processed/server_logs +cp -R ./data/raw/server*.log ./data/processed/server_logs # 6. Repeat the above step for user logs and event logs cp ./data/raw/user*.log ./data/processed/user_logs From 761fba7cf7e87ef3d42aa483f6c81ababe6319e7 Mon Sep 17 00:00:00 2001 From: Michelle Wang Date: Tue, 3 Mar 2026 16:06:24 -0500 Subject: [PATCH 08/17] Completed --- 02_activities/assignments/assignment.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index 6622de247..9a36851c8 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -53,8 +53,8 @@ cp ./data/raw/user*.log ./data/processed/user_logs cp ./data/raw/event*.log ./data/processed/event_logs # 7. For user privacy, remove all files containing IP addresses (files with "ipaddr" in the filename) from ./data/raw and ./data/processed/user_logs -rm ./data/raw/*ipaddr* -rm ./data/processed/user_logs/*ipaddr* +rm ./data/raw/ipaddr* +rm ./data/processed/user_logs/ipaddr* # 8. Create a file named ./data/inventory.txt that lists all the files in the subfolders of ./data/processed touch inventory.txt From a1401747e7ff3481a45beacf1534594ac72ab9b7 Mon Sep 17 00:00:00 2001 From: Michelle Wang Date: Tue, 3 Mar 2026 16:11:56 -0500 Subject: [PATCH 09/17] Completed --- 02_activities/assignments/assignment.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index 9a36851c8..0d7abd983 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -46,7 +46,7 @@ mkdir server_logs user_logs event_logs # 5. Copy all server log files (files with "server" in the name AND a .log extension) from ./data/raw to ./data/processed/server_logs cd ../ -cp -R ./data/raw/server*.log ./data/processed/server_logs +cp -R ./data/raw/*server*.log ./data/processed/server_logs # 6. Repeat the above step for user logs and event logs cp ./data/raw/user*.log ./data/processed/user_logs From f59486b9ca4e4d5f5138557d659eab85eca59749 Mon Sep 17 00:00:00 2001 From: Michelle Wang Date: Tue, 3 Mar 2026 16:24:32 -0500 Subject: [PATCH 10/17] Completed --- 02_activities/assignments/assignment.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index 0d7abd983..637d8e7be 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -46,7 +46,7 @@ mkdir server_logs user_logs event_logs # 5. Copy all server log files (files with "server" in the name AND a .log extension) from ./data/raw to ./data/processed/server_logs cd ../ -cp -R ./data/raw/*server*.log ./data/processed/server_logs +cp -i ./data/raw/*server*.log ./data/processed/server_logs # 6. Repeat the above step for user logs and event logs cp ./data/raw/user*.log ./data/processed/user_logs From b54893411649d1572192b1a0136f3751f1aa0907 Mon Sep 17 00:00:00 2001 From: Michelle Wang Date: Tue, 3 Mar 2026 16:33:26 -0500 Subject: [PATCH 11/17] Complete --- 02_activities/assignments/assignment.sh | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index 637d8e7be..066274a21 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -45,20 +45,19 @@ cd processed mkdir server_logs user_logs event_logs # 5. Copy all server log files (files with "server" in the name AND a .log extension) from ./data/raw to ./data/processed/server_logs -cd ../ -cp -i ./data/raw/*server*.log ./data/processed/server_logs +cp ./data/raw/*server*.log ./data/processed/server_logs # 6. Repeat the above step for user logs and event logs -cp ./data/raw/user*.log ./data/processed/user_logs -cp ./data/raw/event*.log ./data/processed/event_logs +cp ./data/raw/*user*.log ./data/processed/user_logs +cp ./data/raw/*event*.log ./data/processed/event_logs # 7. For user privacy, remove all files containing IP addresses (files with "ipaddr" in the filename) from ./data/raw and ./data/processed/user_logs -rm ./data/raw/ipaddr* -rm ./data/processed/user_logs/ipaddr* +rm ./data/raw/*ipaddr* +rm ./data/processed/user_logs/*ipaddr* # 8. Create a file named ./data/inventory.txt that lists all the files in the subfolders of ./data/processed touch inventory.txt -find ./data/processed -type f > inventory.txt +find ./processed -type f > inventory.txt ########################################### From 629c0e6c971df0ff422265124411769e38ddc882 Mon Sep 17 00:00:00 2001 From: Michelle Wang Date: Tue, 3 Mar 2026 16:34:49 -0500 Subject: [PATCH 12/17] Complete --- 02_activities/assignments/assignment.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index 066274a21..f7e0f20df 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -56,6 +56,7 @@ rm ./data/raw/*ipaddr* rm ./data/processed/user_logs/*ipaddr* # 8. Create a file named ./data/inventory.txt that lists all the files in the subfolders of ./data/processed +cd ../ touch inventory.txt find ./processed -type f > inventory.txt From b69da0962c0d5a01c267cc96b3329be672c3c0ff Mon Sep 17 00:00:00 2001 From: Michelle Wang Date: Tue, 3 Mar 2026 16:41:42 -0500 Subject: [PATCH 13/17] Complete --- 02_activities/assignments/assignment.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index f7e0f20df..ae7113303 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -45,7 +45,7 @@ cd processed mkdir server_logs user_logs event_logs # 5. Copy all server log files (files with "server" in the name AND a .log extension) from ./data/raw to ./data/processed/server_logs -cp ./data/raw/*server*.log ./data/processed/server_logs +find ./data/raw -type f -name "*server*.log" -exec cp {} ./data/processed/server_logs/ # 6. Repeat the above step for user logs and event logs cp ./data/raw/*user*.log ./data/processed/user_logs From de47844a0b707313d5ba14ca8e890f4c50eeb2ad Mon Sep 17 00:00:00 2001 From: Michelle Wang Date: Tue, 3 Mar 2026 16:46:17 -0500 Subject: [PATCH 14/17] Complete --- 02_activities/assignments/assignment.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index ae7113303..151cc6be6 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -45,7 +45,7 @@ cd processed mkdir server_logs user_logs event_logs # 5. Copy all server log files (files with "server" in the name AND a .log extension) from ./data/raw to ./data/processed/server_logs -find ./data/raw -type f -name "*server*.log" -exec cp {} ./data/processed/server_logs/ +cp ../raw/*server*.log ./server_logs # 6. Repeat the above step for user logs and event logs cp ./data/raw/*user*.log ./data/processed/user_logs From c18cd7a249b3d7e81a73f3a66e78ebc64403aa98 Mon Sep 17 00:00:00 2001 From: Michelle Wang Date: Tue, 3 Mar 2026 16:47:37 -0500 Subject: [PATCH 15/17] Complete --- 02_activities/assignments/assignment.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index 151cc6be6..bef2d7250 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -48,12 +48,12 @@ mkdir server_logs user_logs event_logs cp ../raw/*server*.log ./server_logs # 6. Repeat the above step for user logs and event logs -cp ./data/raw/*user*.log ./data/processed/user_logs -cp ./data/raw/*event*.log ./data/processed/event_logs +cp ../raw/*user*.log ../user_logs +cp ../*event*.log ../event_logs # 7. For user privacy, remove all files containing IP addresses (files with "ipaddr" in the filename) from ./data/raw and ./data/processed/user_logs -rm ./data/raw/*ipaddr* -rm ./data/processed/user_logs/*ipaddr* +rm ../raw/*ipaddr* +rm ../user_logs/*ipaddr* # 8. Create a file named ./data/inventory.txt that lists all the files in the subfolders of ./data/processed cd ../ From a97ed1b92715d302a5841eac2786469d12655d2c Mon Sep 17 00:00:00 2001 From: Michelle Wang Date: Tue, 3 Mar 2026 16:49:31 -0500 Subject: [PATCH 16/17] Complete --- 02_activities/assignments/assignment.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index bef2d7250..6f23cf7c0 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -48,8 +48,8 @@ mkdir server_logs user_logs event_logs cp ../raw/*server*.log ./server_logs # 6. Repeat the above step for user logs and event logs -cp ../raw/*user*.log ../user_logs -cp ../*event*.log ../event_logs +cp ../raw/*user*.log ./user_logs +cp ../*event*.log ./event_logs # 7. For user privacy, remove all files containing IP addresses (files with "ipaddr" in the filename) from ./data/raw and ./data/processed/user_logs rm ../raw/*ipaddr* From de76c592de94c853f5c5a6ddf86f9d8ea0d0714f Mon Sep 17 00:00:00 2001 From: Michelle Wang Date: Tue, 3 Mar 2026 16:50:52 -0500 Subject: [PATCH 17/17] Complete --- 02_activities/assignments/assignment.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/02_activities/assignments/assignment.sh b/02_activities/assignments/assignment.sh index 6f23cf7c0..e489d7afb 100644 --- a/02_activities/assignments/assignment.sh +++ b/02_activities/assignments/assignment.sh @@ -49,11 +49,11 @@ cp ../raw/*server*.log ./server_logs # 6. Repeat the above step for user logs and event logs cp ../raw/*user*.log ./user_logs -cp ../*event*.log ./event_logs +cp ../raw/*event*.log ./event_logs # 7. For user privacy, remove all files containing IP addresses (files with "ipaddr" in the filename) from ./data/raw and ./data/processed/user_logs rm ../raw/*ipaddr* -rm ../user_logs/*ipaddr* +rm ./user_logs/*ipaddr* # 8. Create a file named ./data/inventory.txt that lists all the files in the subfolders of ./data/processed cd ../