From 3b458c543f788c2c4372494efd4900d2ab066041 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 6 Dec 2025 10:02:53 +0000
Subject: [PATCH 1/7] Initial plan


From a85251d4d2daca8da71de6a3722867d9803c89f3 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 6 Dec 2025 10:08:03 +0000
Subject: [PATCH 2/7] Update scraper to handle modern DownDetector HTML and
 improve bot protection bypass

Co-authored-by: aaryanrr <73213670+aaryanrr@users.noreply.github.com>
---
 src/Scraper.py | 101 ++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 87 insertions(+), 14 deletions(-)

diff --git a/src/Scraper.py b/src/Scraper.py
index 39e3da9..88bcb27 100644
--- a/src/Scraper.py
+++ b/src/Scraper.py
@@ -13,27 +13,89 @@ class URLInstance(object):
 
     def __init__(self, service_name):
         self.url = f"{self.url}" + service_name
+        # Create a session for better connection handling and bot protection bypass
+        self.session = requests.Session()
 
     # Scrape the Status of the Service from the Page
     def get_status(self):
-        header = {
-            "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
-                          "Chrome/74.0.3729.169 Safari/537.36",
-            'referer': 'https://www.google.com/'
+        # Updated headers to mimic a real browser more closely
+        headers = {
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
+                          "Chrome/120.0.0.0 Safari/537.36",
+            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
+            "Accept-Language": "en-US,en;q=0.9",
+            "Accept-Encoding": "gzip, deflate, br",
+            "DNT": "1",
+            "Connection": "keep-alive",
+            "Upgrade-Insecure-Requests": "1",
+            "Sec-Fetch-Dest": "document",
+            "Sec-Fetch-Mode": "navigate",
+            "Sec-Fetch-Site": "none",
+            "Cache-Control": "max-age=0",
+            "Referer": "https://www.google.com/"
         }
+        
         try:
-            page = requests.get(self.url, headers=header)
-            soup = BeautifulSoup(page.content, 'html5lib')
-            status = soup.find('div', attrs={'id': 'company'})
-            text = status.find('div', attrs={'class': 'h2 entry-title'})
-            print(text.text.strip())
+            # Use session for better connection handling
+            response = self.session.get(self.url, headers=headers, timeout=10)
+            response.raise_for_status()
+            
+            soup = BeautifulSoup(response.content, 'html5lib')
+            
+            # Try multiple selectors to find the status message (more robust approach)
+            status_text = None
+            
+            # Strategy 1: Try original selector for backward compatibility
+            company_div = soup.find('div', attrs={'id': 'company'})
+            if company_div:
+                text_elem = company_div.find('div', attrs={'class': 'h2 entry-title'})
+                if text_elem:
+                    status_text = text_elem.text.strip()
+            
+            # Strategy 2: Try to find entry-title class directly (more common in modern design)
+            if not status_text:
+                entry_title = soup.find(['h1', 'h2', 'h3', 'div'], class_='entry-title')
+                if entry_title:
+                    status_text = entry_title.text.strip()
+            
+            # Strategy 3: Try to find status-related elements with common class patterns
+            if not status_text:
+                # Look for elements with "status" in class name
+                for elem in soup.find_all(['div', 'span', 'h1', 'h2', 'h3']):
+                    class_names = elem.get('class', [])
+                    if any('status' in str(c).lower() or 'entry-title' in str(c).lower() for c in class_names):
+                        text = elem.text.strip()
+                        # Filter out very short text or common non-status text
+                        if text and len(text) > 5 and not text.lower() in ['status', 'info', 'information']:
+                            status_text = text
+                            break
+            
+            # Strategy 4: Look for the page title as fallback
+            if not status_text:
+                page_title = soup.find('h1')
+                if page_title:
+                    text = page_title.text.strip()
+                    # Only use if it looks like a status message (contains certain keywords)
+                    if any(keyword in text.lower() for keyword in ['problem', 'issue', 'outage', 'down', 'working', 'reports', 'no problems']):
+                        status_text = text
+            
+            if status_text:
+                print(status_text)
+            else:
+                raise Errors.InvalidServiceName(
+                    "Could not find status information. The service name may be invalid or the page structure has changed."
+                )
 
         except AttributeError:
             # Expecting AttributeError if the Name given is Invalid
             # A NoneType object won't have the attribute .text as used above
             raise Errors.InvalidServiceName("Name of the Service is Invalid!")
-        else:
-            pass
+        except requests.exceptions.RequestException as e:
+            print(f"Error fetching the page: {e}")
+            raise Errors.InvalidServiceName("Could not fetch the page. Please check your internet connection and try again.")
+        except Exception as e:
+            print(f"Unexpected error: {e}")
+            raise
 
     # Prints the URL for the Status Page of the Service
     def get_url(self):
@@ -51,12 +113,23 @@ def get_base_url(cls):
 
 # Function to Check the Internet Connection
 def check_connection():
+    headers = {
+        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
+                      "Chrome/120.0.0.0 Safari/537.36"
+    }
     try:
-        requests.get("https://downdetector.com")
+        response = requests.get("https://downdetector.com", headers=headers, timeout=10)
+        response.raise_for_status()
+        print("All Good 👍")
     except ConnectionError:
         print("This program requires an active Internet Connection!")
-    else:
-        print("All Good 👍")
+        quit()
+    except requests.exceptions.Timeout:
+        print("Connection timeout. Please check your internet connection!")
+        quit()
+    except Exception as e:
+        print(f"Connection check failed: {e}")
+        print("Proceeding anyway...")
 
 
 # Function for the Menu

From 89c08813533a21c7b7c1ce9f816187625757a2cd Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 6 Dec 2025 10:09:34 +0000
Subject: [PATCH 3/7] Add CHANGELOG and update README with recent improvements

Co-authored-by: aaryanrr <73213670+aaryanrr@users.noreply.github.com>
---
 CHANGELOG.md | 35 +++++++++++++++++++++++++++++++++++
 README.md    | 10 ++++++++++
 2 files changed, 45 insertions(+)
 create mode 100644 CHANGELOG.md

diff --git a/CHANGELOG.md b/CHANGELOG.md
new file mode 100644
index 0000000..3facbd0
--- /dev/null
+++ b/CHANGELOG.md
@@ -0,0 +1,35 @@
+# Changelog
+
+## [Unreleased] - 2025-12-06
+
+### Fixed
+- Fixed scraping functionality to work with modern DownDetector website structure
+- Improved bot detection bypass with updated headers and session handling
+- Added backward compatibility with original HTML structure
+
+### Changed
+- Updated User-Agent to modern Chrome version (120.0.0.0)
+- Added comprehensive HTTP headers (Accept, Accept-Language, Sec-Fetch-*, etc.)
+- Implemented session-based requests for better connection handling
+- Added timeout (10 seconds) to prevent hanging requests
+
+### Added
+- Multi-strategy HTML parsing:
+  - Strategy 1: Original selector for backward compatibility
+  - Strategy 2: Direct entry-title class lookup (most common)
+  - Strategy 3: Find elements with "status" or "entry-title" in class names
+  - Strategy 4: Fallback to h1 with status-related keywords
+- Better error messages for different failure scenarios
+- Response status validation with `raise_for_status()`
+- Timeout handling for connection issues
+
+### Technical Details
+
+The scraper now attempts multiple strategies to find the status message, making it more resilient to website changes:
+
+1. **Backward Compatibility**: Tries the original `div#company > div.h2.entry-title` selector first
+2. **Modern Structure**: Looks for `.entry-title` class on h1, h2, h3, or div elements
+3. **Class Pattern Matching**: Searches for elements with "status" or "entry-title" in their class names
+4. **Keyword-Based Fallback**: Uses h1 tags containing status keywords (problem, issue, outage, down, working, reports, no problems)
+
+This multi-layered approach ensures the scraper works with both old and new website structures.
diff --git a/README.md b/README.md
index 6eff795..a149fd9 100644
--- a/README.md
+++ b/README.md
@@ -9,6 +9,16 @@
 > *It scrapes the status message from DownDetector's Website and shows it in the terminal.
 So you don't need to open up the Website for checking the status of a service.*
 
+### Recent Updates (December 2025)
+
+The scraper has been updated to work with the modern DownDetector website structure:
+- ✅ Improved bot detection bypass with modern browser headers
+- ✅ Multi-strategy HTML parsing for resilience against website changes
+- ✅ Better error handling and timeout support
+- ✅ Backward compatible with older website structures
+
+See [CHANGELOG.md](CHANGELOG.md) for detailed changes.
+
 ## CONTRIBUTING
 
 > *For details on how to contribute to this project see [this](https://github.com/aaryanrr/DownDetector-CLI/blob/main/CONTRIBUTING.md)*

From d88a054d5502e5d683ab507413dc4d8ccb508570 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 6 Dec 2025 10:12:23 +0000
Subject: [PATCH 4/7] Address code review comments - refactor for better
 maintainability

Co-authored-by: aaryanrr <73213670+aaryanrr@users.noreply.github.com>
---
 src/Scraper.py | 61 ++++++++++++++++++++++++++------------------------
 1 file changed, 32 insertions(+), 29 deletions(-)

diff --git a/src/Scraper.py b/src/Scraper.py
index 88bcb27..84d17c9 100644
--- a/src/Scraper.py
+++ b/src/Scraper.py
@@ -5,8 +5,26 @@
 
 import src.Errors as Errors
 
+# Constants for HTTP headers to mimic real browser behavior
+USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
 
-# Class for thw URL Instance
+DEFAULT_HEADERS = {
+    "User-Agent": USER_AGENT,
+    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
+    "Accept-Language": "en-US,en;q=0.9",
+    "Accept-Encoding": "gzip, deflate, br",
+    "DNT": "1",
+    "Connection": "keep-alive",
+    "Upgrade-Insecure-Requests": "1",
+    "Sec-Fetch-Dest": "document",
+    "Sec-Fetch-Mode": "navigate",
+    "Sec-Fetch-Site": "none",
+    "Cache-Control": "max-age=0",
+    "Referer": "https://www.google.com/"
+}
+
+
+# Class for the URL Instance
 class URLInstance(object):
 
     url = "https://downdetector.com/status/"
@@ -18,26 +36,9 @@ def __init__(self, service_name):
 
     # Scrape the Status of the Service from the Page
     def get_status(self):
-        # Updated headers to mimic a real browser more closely
-        headers = {
-            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
-                          "Chrome/120.0.0.0 Safari/537.36",
-            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
-            "Accept-Language": "en-US,en;q=0.9",
-            "Accept-Encoding": "gzip, deflate, br",
-            "DNT": "1",
-            "Connection": "keep-alive",
-            "Upgrade-Insecure-Requests": "1",
-            "Sec-Fetch-Dest": "document",
-            "Sec-Fetch-Mode": "navigate",
-            "Sec-Fetch-Site": "none",
-            "Cache-Control": "max-age=0",
-            "Referer": "https://www.google.com/"
-        }
-        
         try:
             # Use session for better connection handling
-            response = self.session.get(self.url, headers=headers, timeout=10)
+            response = self.session.get(self.url, headers=DEFAULT_HEADERS, timeout=10)
             response.raise_for_status()
             
             soup = BeautifulSoup(response.content, 'html5lib')
@@ -61,22 +62,28 @@ def get_status(self):
             # Strategy 3: Try to find status-related elements with common class patterns
             if not status_text:
                 # Look for elements with "status" in class name
-                for elem in soup.find_all(['div', 'span', 'h1', 'h2', 'h3']):
+                def is_valid_status_element(elem):
                     class_names = elem.get('class', [])
                     if any('status' in str(c).lower() or 'entry-title' in str(c).lower() for c in class_names):
                         text = elem.text.strip()
                         # Filter out very short text or common non-status text
-                        if text and len(text) > 5 and not text.lower() in ['status', 'info', 'information']:
-                            status_text = text
-                            break
+                        if text and len(text) > 5 and text.lower() not in ['status', 'info', 'information']:
+                            return True
+                    return False
+                
+                status_elem = next((elem for elem in soup.find_all(['div', 'span', 'h1', 'h2', 'h3']) 
+                                   if is_valid_status_element(elem)), None)
+                if status_elem:
+                    status_text = status_elem.text.strip()
             
             # Strategy 4: Look for the page title as fallback
             if not status_text:
                 page_title = soup.find('h1')
                 if page_title:
                     text = page_title.text.strip()
+                    text_lower = text.lower()
                     # Only use if it looks like a status message (contains certain keywords)
-                    if any(keyword in text.lower() for keyword in ['problem', 'issue', 'outage', 'down', 'working', 'reports', 'no problems']):
+                    if any(keyword in text_lower for keyword in ['problem', 'issue', 'outage', 'down', 'working', 'reports', 'no problems']):
                         status_text = text
             
             if status_text:
@@ -113,12 +120,8 @@ def get_base_url(cls):
 
 # Function to Check the Internet Connection
 def check_connection():
-    headers = {
-        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
-                      "Chrome/120.0.0.0 Safari/537.36"
-    }
     try:
-        response = requests.get("https://downdetector.com", headers=headers, timeout=10)
+        response = requests.get("https://downdetector.com", headers={"User-Agent": USER_AGENT}, timeout=10)
         response.raise_for_status()
         print("All Good 👍")
     except ConnectionError:

From d3314d5f54cc5c171ff8524b5ad40e9b4eaa4428 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 6 Dec 2025 10:16:54 +0000
Subject: [PATCH 5/7] Address final code review comments - add NetworkError
 exception and extract constants

Co-authored-by: aaryanrr <73213670+aaryanrr@users.noreply.github.com>
---
 Main.py        |  8 +++++++-
 src/Errors.py  |  5 +++++
 src/Scraper.py | 11 +++++------
 3 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/Main.py b/Main.py
index 11c952e..7bbef7a 100644
--- a/Main.py
+++ b/Main.py
@@ -1,4 +1,5 @@
 from src.Scraper import URLInstance, menu, check_connection
+import src.Errors as Errors
 
 # Checking the Internet Connection on Startup
 print("Checking Internet Connection..")
@@ -22,7 +23,12 @@
 
     option = int(input("Enter the Option Number: "))
     if option == 1:
-        instance.get_status()
+        try:
+            instance.get_status()
+        except Errors.InvalidServiceName as e:
+            print(f"Service Error: {e}")
+        except Errors.NetworkError as e:
+            print(f"Network Error: {e}")
     elif option == 2:
         instance.open_url()
     elif option == 3:
diff --git a/src/Errors.py b/src/Errors.py
index f6f97cb..1aa3345 100644
--- a/src/Errors.py
+++ b/src/Errors.py
@@ -3,3 +3,8 @@
 class InvalidServiceName(Exception):
     # Raised when the Service Name entered is Invalid
     pass
+
+
+class NetworkError(Exception):
+    # Raised when there are network/connection issues
+    pass
diff --git a/src/Scraper.py b/src/Scraper.py
index 84d17c9..0308206 100644
--- a/src/Scraper.py
+++ b/src/Scraper.py
@@ -23,6 +23,9 @@
     "Referer": "https://www.google.com/"
 }
 
+# Minimum length for status text to be considered valid
+MIN_STATUS_TEXT_LENGTH = 5
+
 
 # Class for the URL Instance
 class URLInstance(object):
@@ -67,7 +70,7 @@ def is_valid_status_element(elem):
                     if any('status' in str(c).lower() or 'entry-title' in str(c).lower() for c in class_names):
                         text = elem.text.strip()
                         # Filter out very short text or common non-status text
-                        if text and len(text) > 5 and text.lower() not in ['status', 'info', 'information']:
+                        if text and len(text) > MIN_STATUS_TEXT_LENGTH and text.lower() not in ['status', 'info', 'information']:
                             return True
                     return False
                 
@@ -93,13 +96,9 @@ def is_valid_status_element(elem):
                     "Could not find status information. The service name may be invalid or the page structure has changed."
                 )
 
-        except AttributeError:
-            # Expecting AttributeError if the Name given is Invalid
-            # A NoneType object won't have the attribute .text as used above
-            raise Errors.InvalidServiceName("Name of the Service is Invalid!")
         except requests.exceptions.RequestException as e:
             print(f"Error fetching the page: {e}")
-            raise Errors.InvalidServiceName("Could not fetch the page. Please check your internet connection and try again.")
+            raise Errors.NetworkError("Could not fetch the page. Please check your internet connection and try again.")
         except Exception as e:
             print(f"Unexpected error: {e}")
             raise

From 0a12ffffbc54b95a6d92858779754b526c1ee86f Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sat, 6 Dec 2025 10:19:04 +0000
Subject: [PATCH 6/7] Fix ConnectionError import to use specific requests
 exception

Co-authored-by: aaryanrr <73213670+aaryanrr@users.noreply.github.com>
---
 src/Scraper.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/Scraper.py b/src/Scraper.py
index 0308206..5dd2119 100644
--- a/src/Scraper.py
+++ b/src/Scraper.py
@@ -1,6 +1,5 @@
 import webbrowser
 import requests
-from requests.exceptions import ConnectionError
 from bs4 import BeautifulSoup
 
 import src.Errors as Errors
@@ -123,7 +122,7 @@ def check_connection():
         response = requests.get("https://downdetector.com", headers={"User-Agent": USER_AGENT}, timeout=10)
         response.raise_for_status()
         print("All Good 👍")
-    except ConnectionError:
+    except requests.exceptions.ConnectionError:
         print("This program requires an active Internet Connection!")
         quit()
     except requests.exceptions.Timeout:

From e16e998bdbfab6295a922863a103060bd117360a Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Sun, 7 Dec 2025 20:12:15 +0000
Subject: [PATCH 7/7] Improve error message to be more helpful and actionable
 for users

Co-authored-by: aaryanrr <73213670+aaryanrr@users.noreply.github.com>
---
 src/Scraper.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/Scraper.py b/src/Scraper.py
index 5dd2119..c9da606 100644
--- a/src/Scraper.py
+++ b/src/Scraper.py
@@ -92,7 +92,9 @@ def is_valid_status_element(elem):
                 print(status_text)
             else:
                 raise Errors.InvalidServiceName(
-                    "Could not find status information. The service name may be invalid or the page structure has changed."
+                    f"Unable to find status for '{self.url.split('/')[-1]}'. "
+                    "Please verify the service name is correct (e.g., 'facebook', 'twitter', 'instagram'). "
+                    "Visit https://downdetector.com to confirm the service exists."
                 )
 
         except requests.exceptions.RequestException as e: