GoogleCloudPlatform · bhandarivijay-png · Dec 25, 2025
diff --git a/...-tower-anomaly-detection-dbt/cell-tower-anomaly-detection-dbt/02-config/terraform/main.tf b/...-tower-anomaly-detection-dbt/cell-tower-anomaly-detection-dbt/02-config/terraform/main.tf
@@ -125,7 +125,7 @@ resource "google_storage_bucket" "gcs-bucket" {
   force_destroy = true
   provisioner "local-exec" {
     interpreter = ["/bin/bash" ,"-c"]
-    command = "gsutil cp '${var.src_customer_data}' '${var.dst_customer_data}'/ && gsutil cp '${var.src_service_data}' '${var.dst_service_data}'/ && gsutil cp '${var.src_telecom_data}' '${var.dst_telecom_data}'/"
+    command = "gcloud storage cp '${var.src_customer_data}' '${var.dst_customer_data}'/ && gcloud storage cp '${var.src_service_data}' '${var.dst_service_data}'/ && gcloud storage cp '${var.src_telecom_data}' '${var.dst_telecom_data}'/"
 
 }
 }

diff --git a/customer_churn_biglake/instructions/04-files-upload.md b/customer_churn_biglake/instructions/04-files-upload.md
@@ -48,7 +48,7 @@ Run the following gcloud command in Cloud Shell to create the bucket to store da
 <hr>
 
 ```
-gsutil mb -p $PROJECT_ID -c STANDARD -l $REGION -b on gs://$BUCKET_CODE
+gcloud storage buckets create gs://$BUCKET_CODE --project=$PROJECT_ID --default-storage-class=STANDARD --location=$REGION --uniform-bucket-level-access
 ```
 
 <br>
@@ -88,7 +88,7 @@ bq show --connection $PROJECT_ID.$REGION.$CONNECTION_ID
 Grant the necessary permissions to the Service Account:<br>
 
 ```
-gsutil iam ch serviceAccount:<your_service_account_id_here>:objectViewer gs://$BUCKET_CODE
+gcloud storage buckets add-iam-policy-binding gs://$BUCKET_CODE --member=serviceAccount:<your_service_account_id_here> --role=roles/storage.objectViewer
 ```
 
 ## 6. Create a BigLake table:

diff --git a/s8s-prerequisites/01-instructions/01-terraform-instructions.md b/s8s-prerequisites/01-instructions/01-terraform-instructions.md
@@ -115,7 +115,7 @@ PROJECT_ID=`gcloud config list --format "value(core.project)" 2>/dev/null`
 PROJECT_NBR=`gcloud projects describe $PROJECT_ID | grep projectNumber | cut -d':' -f2 |  tr -d "'" | xargs`
 cd ~/serverless-spark-workshop/s8s-prerequisites/00-scripts-and-config/terraform
 terraform output > resource-list.txt
-gsutil cp resource-list.txt gs://s8s-code-and-data-bucket-$PROJECT_NBR
+gcloud storage cp resource-list.txt gs://s8s-code-and-data-bucket-$PROJECT_NBR
 ```
 
 ## 3. Roles required for the Hackfest Attendees

diff --git a/s8s-spark-mlops/02-scripts/bash/build-container-image.sh b/s8s-spark-mlops/02-scripts/bash/build-container-image.sh
@@ -163,7 +163,7 @@ echo "Completed Dockerfile creation"
 
 # Download dependencies to be baked into image
 cd $LOCAL_SCRATCH_DIR
-gsutil cp $BQ_CONNECTOR_JAR_URI .
+gcloud storage cp $BQ_CONNECTOR_JAR_URI .
 wget -P . https://repo.anaconda.com/miniconda/Miniconda3-py39_4.10.3-Linux-x86_64.sh
 echo "Completed downloading dependencies"
 

diff --git a/...spark_streaming/instructions/05a_serverless_spark_streaming_gcloud_execution.md b/...spark_streaming/instructions/05a_serverless_spark_streaming_gcloud_execution.md
@@ -126,7 +126,7 @@ BUCKET_CODE=                                        #GCP bucket where our code,
 #### 4.2.2 Command to copy Files.
 
 ```
-gsutil cp gs://$BUCKET_CODE/serverless_spark_streaming/01-datasets/data_files/<<file_name>>  gs://$BUCKET_CODE/serverless_spark_streaming/01-datasets/streaming_data/
+gcloud storage cp gs://$BUCKET_CODE/serverless_spark_streaming/01-datasets/data_files/<<file_name>>  gs://$BUCKET_CODE/serverless_spark_streaming/01-datasets/streaming_data/
 
 ```
 

diff --git a/...park_streaming/instructions/05b_serverless_spark_streaming_console_execution.md b/...park_streaming/instructions/05b_serverless_spark_streaming_console_execution.md
@@ -135,7 +135,7 @@ BUCKET_CODE=                                        #GCP bucket where our code,
 #### 4.2.2 Command to copy Files.
 
 ```
-gsutil cp gs://$BUCKET_CODE/serverless_spark_streaming/01-datasets/data_files/<<file_name>>  gs://$BUCKET_CODE/serverless_spark_streaming/01-datasets/streaming_data/
+gcloud storage cp gs://$BUCKET_CODE/serverless_spark_streaming/01-datasets/data_files/<<file_name>>  gs://$BUCKET_CODE/serverless_spark_streaming/01-datasets/streaming_data/
 
 ```
 

diff --git a/social_media_data_analytics/instructions/04-create-docker-image.md b/social_media_data_analytics/instructions/04-create-docker-image.md
@@ -26,7 +26,7 @@ gcloud auth configure-docker ${REGION}-docker.pkg.dev
 Run the below command in VM.
 
 ```
-gsutil cp \
+gcloud storage cp \
   gs://spark-lib/bigquery/spark-bigquery-with-dependencies_2.12-0.22.2.jar .
 
 ```

diff --git a/social_network_graph/instructions/05-create-docker-image.md b/social_network_graph/instructions/05-create-docker-image.md
@@ -48,7 +48,7 @@ We will use it throughout the lab. <br>
 Run the below command in VM.
 
 ```
-gsutil cp \
+gcloud storage cp \
   gs://spark-lib/bigquery/spark-bigquery-with-dependencies_2.12-0.22.2.jar .
 
 ```

diff --git a/timeseries_forecasting/instructions/05-create-docker-image.md b/timeseries_forecasting/instructions/05-create-docker-image.md
@@ -26,7 +26,7 @@ gcloud auth configure-docker ${REGION}-docker.pkg.dev
 Run the below command in VM.
 
 ```
-gsutil cp \
+gcloud storage cp \
   gs://spark-lib/bigquery/spark-bigquery-with-dependencies_2.12-0.22.2.jar .
 
 ```

diff --git a/wikipedia-page-view-analysis/README.md b/wikipedia-page-view-analysis/README.md
@@ -1,149 +1,149 @@
-# Wikipedia Page Views Analysis from the BigQuery UI powered by Dataproc Serverless Spark
-
-Lab contributed by [TEKsystems](https://www.teksystems.com/en/about-us/partnerships/google-cloud) and Google.
-
-This lab demonstrates how to use the BigQuery UI for running Dataproc Serverless Spark jobs for data analytics.
-
-## 1. Prerequisite
-See [this lab for an example prerequisite set up](https://github.com/GoogleCloudPlatform/serverless-spark-workshop/blob/main/malware_detection/instructions/01-gcp-prerequisites.md) or [these Terraform modules to provision and configure a Serverless Spark environment](https://github.com/anagha-google/ts22-just-enough-terraform-for-da).
-
-## 2. Variables
-
-Paste this into Google Cloud CLI in Cloud Shell after replacing with your values -
-```
-PROJECT_ID=YOUR_PROJECT_ID
-PROJECT_NBR=YOUR_PROJECT_NBR
-BQ_UI_BUCKET_NM=gs://s8s-bigspark-$PROJECT_NBR
-LOCATION=us-central1
-```
-
-## 3. Storage Bucket
-
-A storage bucket is needed, for Serverless Spark. Lets create one-
-```
-gsutil mb -p $PROJECT_ID -c STANDARD -l $LOCATION -b on $BQ_UI_BUCKET_NM
-```
-
-## 4. Needed in the UI
-
-Just the storage bucket created above.
-Note: You will run the lab as your own identity.
-
-## 5. Wikipedia Page Views Analysis - code
-
-```
-# Copyright 2022 Google LLC.
-# SPDX-License-Identifier: Apache-2.0
-
-from pyspark.sql import SparkSession
-from pyspark.ml.feature import StopWordsRemover
-from pyspark.sql import functions as F
-
-spark = SparkSession.builder \
-.appName('Wikipedia-Analytics')\
-.getOrCreate()
-
-# Base dataset in BQ
-bqTableFQN = "bigquery-public-data.wikipedia.pageviews_2019"
-
-# Read base dataset with filters
-wikiPageviewsDF = spark.read \
-.format("bigquery") \
-.option("table", bqTableFQN) \
-.option("filter", "datehour >= '2019-01-01' ") \
-.load()
-
-# Subset the columns
-pageViewsSubsetDF = wikiPageviewsDF \
-.select("title", "wiki", "views") \
-.where("views > 5")
-
-# Cache
-pageViewsSubsetDF.cache()
-
-# Filter to just english
-pageViewsSubsetEnglishDF = pageViewsSubsetDF \
-.where("wiki in ('en', 'en.m')")
-
-# Aggregate by title
-pageViewsSubsetEnglishByTitleDF = pageViewsSubsetEnglishDF \
-.groupBy("title") \
-.agg(F.sum('views').alias('total_views'))
-
-# Order by and print
-pageViewsSubsetEnglishByTitleDF.orderBy('total_views', ascending=False).show(20)
-```
-
-## 6. Lets get started
-
-### 6.1. Navigate to the BQ UI from Cloud Console
-
-![bq-1](images/00-bq-01.png)
-<br>
-
-### 6.2. Click on create
-
-![bq-2](images/00-bq-02.png)  
-<br>
-
-### 6.3. Click on "Compose new PySpark"
-
-![bq-3](images/00-bq-03.png)
-<br>
-
-### 6.4. Click on "More -> PySpark Options"
-
-![bq-4](images/00-bq-04.png)
-<br>
-
-### 6.5. Click on "Browse and select staging folder"
-
-![bq-5](images/00-bq-05.png)  
-<br>
-
-### 6.6. Click on "s8s bigSpark bucket"
-
-![bq-6](images/00-bq-06.png)  
-<br>
-
-![bq-7](images/00-bq-07.png)  
-<br>
-
-### 6.7. Select your network and subnet
-
-![bq-8](images/00-bq-08.png)
-<br>
-
-### 6.8. Click "Save"
-![bq-9](images/00-bq-09.png)  
-<br>
-
-### 6.9. Paste the code snippet above, into the UI text area and click "Run"
-![bq-10](images/00-bq-10.png)
-<br>
-
-### 6.10. Switch to Dataproc UI "Batches" view
-
-You should see a new batch job-
-
-![bq-11](images/00-bq-11.png)
-<br>
-
-### 6.11. Switch back to the BQ UI to view the results
-
-![bq-12](images/00-bq-12.png)
-<br>
-<br>
-![bq-13](images/00-bq-13.png)  
-
-### 6.12. Switch to Dataproc UI "Batches" view to look at the monitoring tile
-You should see the autoscale kick in after 2 minutes
-<br>
-![bq-13](images/00-bq-14.png)  
-<br>
-
-
-##### =====================================================================================================
-##### THIS CONCLUDES THIS LAB
-##### PROGRESS TO NEXT LAB, OR SHUT DOWN RESOURCES
-##### =====================================================================================================
+# Wikipedia Page Views Analysis from the BigQuery UI powered by Dataproc Serverless Spark
+
+Lab contributed by [TEKsystems](https://www.teksystems.com/en/about-us/partnerships/google-cloud) and Google.
+
+This lab demonstrates how to use the BigQuery UI for running Dataproc Serverless Spark jobs for data analytics.
+
+## 1. Prerequisite
+See [this lab for an example prerequisite set up](https://github.com/GoogleCloudPlatform/serverless-spark-workshop/blob/main/malware_detection/instructions/01-gcp-prerequisites.md) or [these Terraform modules to provision and configure a Serverless Spark environment](https://github.com/anagha-google/ts22-just-enough-terraform-for-da).
+
+## 2. Variables
+
+Paste this into Google Cloud CLI in Cloud Shell after replacing with your values -
+```
+PROJECT_ID=YOUR_PROJECT_ID
+PROJECT_NBR=YOUR_PROJECT_NBR
+BQ_UI_BUCKET_NM=gs://s8s-bigspark-$PROJECT_NBR
+LOCATION=us-central1
+```
+
+## 3. Storage Bucket
+
+A storage bucket is needed, for Serverless Spark. Lets create one-
+```
+gcloud storage buckets create --project $PROJECT_ID --default-storage-class STANDARD --location $LOCATION --uniform-bucket-level-access $BQ_UI_BUCKET_NM
+```
+
+## 4. Needed in the UI
+
+Just the storage bucket created above.
+Note: You will run the lab as your own identity.
+
+## 5. Wikipedia Page Views Analysis - code
+
+```
+# Copyright 2022 Google LLC.
+# SPDX-License-Identifier: Apache-2.0
+
+from pyspark.sql import SparkSession
+from pyspark.ml.feature import StopWordsRemover
+from pyspark.sql import functions as F
+
+spark = SparkSession.builder \
+.appName('Wikipedia-Analytics')\
+.getOrCreate()
+
+# Base dataset in BQ
+bqTableFQN = "bigquery-public-data.wikipedia.pageviews_2019"
+
+# Read base dataset with filters
+wikiPageviewsDF = spark.read \
+.format("bigquery") \
+.option("table", bqTableFQN) \
+.option("filter", "datehour >= '2019-01-01' ") \
+.load()
+
+# Subset the columns
+pageViewsSubsetDF = wikiPageviewsDF \
+.select("title", "wiki", "views") \
+.where("views > 5")
+
+# Cache
+pageViewsSubsetDF.cache()
+
+# Filter to just english
+pageViewsSubsetEnglishDF = pageViewsSubsetDF \
+.where("wiki in ('en', 'en.m')")
+
+# Aggregate by title
+pageViewsSubsetEnglishByTitleDF = pageViewsSubsetEnglishDF \
+.groupBy("title") \
+.agg(F.sum('views').alias('total_views'))
+
+# Order by and print
+pageViewsSubsetEnglishByTitleDF.orderBy('total_views', ascending=False).show(20)
+```
+
+## 6. Lets get started
+
+### 6.1. Navigate to the BQ UI from Cloud Console
+
+![bq-1](images/00-bq-01.png)
+<br>
+
+### 6.2. Click on create
+
+![bq-2](images/00-bq-02.png)  
+<br>
+
+### 6.3. Click on "Compose new PySpark"
+
+![bq-3](images/00-bq-03.png)
+<br>
+
+### 6.4. Click on "More -> PySpark Options"
+
+![bq-4](images/00-bq-04.png)
+<br>
+
+### 6.5. Click on "Browse and select staging folder"
+
+![bq-5](images/00-bq-05.png)  
+<br>
+
+### 6.6. Click on "s8s bigSpark bucket"
+
+![bq-6](images/00-bq-06.png)  
+<br>
+
+![bq-7](images/00-bq-07.png)  
+<br>
+
+### 6.7. Select your network and subnet
+
+![bq-8](images/00-bq-08.png)
+<br>
+
+### 6.8. Click "Save"
+![bq-9](images/00-bq-09.png)  
+<br>
+
+### 6.9. Paste the code snippet above, into the UI text area and click "Run"
+![bq-10](images/00-bq-10.png)
+<br>
+
+### 6.10. Switch to Dataproc UI "Batches" view
+
+You should see a new batch job-
+
+![bq-11](images/00-bq-11.png)
+<br>
+
+### 6.11. Switch back to the BQ UI to view the results
+
+![bq-12](images/00-bq-12.png)
+<br>
+<br>
+![bq-13](images/00-bq-13.png)  
+
+### 6.12. Switch to Dataproc UI "Batches" view to look at the monitoring tile
+You should see the autoscale kick in after 2 minutes
+<br>
+![bq-13](images/00-bq-14.png)  
+<br>
+
+
+##### =====================================================================================================
+##### THIS CONCLUDES THIS LAB
+##### PROGRESS TO NEXT LAB, OR SHUT DOWN RESOURCES
+##### =====================================================================================================
-Original file line number
+Diff line change
@@ Expand Up / @@ -126,7 +126,7 @@ BUCKET_CODE= #GCP bucket where our code, @@
     #### 4.2.2 Command to copy Files.
     ```
-    gsutil cp gs://$BUCKET_CODE/serverless_spark_streaming/01-datasets/data_files/<<file_name>>  gs://$BUCKET_CODE/serverless_spark_streaming/01-datasets/streaming_data/
+    gcloud storage cp gs://$BUCKET_CODE/serverless_spark_streaming/01-datasets/data_files/<<file_name>>  gs://$BUCKET_CODE/serverless_spark_streaming/01-datasets/streaming_data/
     ```
@@ Expand Down @@