aaryanrr · aaryanrr · Dec 7, 2025 · Dec 6, 2025 · Dec 6, 2025 · Dec 6, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -0,0 +1,35 @@
+# Changelog
+
+## [Unreleased] - 2025-12-06
+
+### Fixed
+- Fixed scraping functionality to work with modern DownDetector website structure
+- Improved bot detection bypass with updated headers and session handling
+- Added backward compatibility with original HTML structure
+
+### Changed
+- Updated User-Agent to modern Chrome version (120.0.0.0)
+- Added comprehensive HTTP headers (Accept, Accept-Language, Sec-Fetch-*, etc.)
+- Implemented session-based requests for better connection handling
+- Added timeout (10 seconds) to prevent hanging requests
+
+### Added
+- Multi-strategy HTML parsing:
+  - Strategy 1: Original selector for backward compatibility
+  - Strategy 2: Direct entry-title class lookup (most common)
+  - Strategy 3: Find elements with "status" or "entry-title" in class names
+  - Strategy 4: Fallback to h1 with status-related keywords
+- Better error messages for different failure scenarios
+- Response status validation with `raise_for_status()`
+- Timeout handling for connection issues
+
+### Technical Details
+
+The scraper now attempts multiple strategies to find the status message, making it more resilient to website changes:
+
+1. **Backward Compatibility**: Tries the original `div#company > div.h2.entry-title` selector first
+2. **Modern Structure**: Looks for `.entry-title` class on h1, h2, h3, or div elements
+3. **Class Pattern Matching**: Searches for elements with "status" or "entry-title" in their class names
+4. **Keyword-Based Fallback**: Uses h1 tags containing status keywords (problem, issue, outage, down, working, reports, no problems)
+
+This multi-layered approach ensures the scraper works with both old and new website structures.
diff --git a/Main.py b/Main.py
@@ -1,4 +1,5 @@
 from src.Scraper import URLInstance, menu, check_connection
+import src.Errors as Errors
 
 # Checking the Internet Connection on Startup
 print("Checking Internet Connection..")
@@ -22,7 +23,12 @@
 
     option = int(input("Enter the Option Number: "))
     if option == 1:
-        instance.get_status()
+        try:
+            instance.get_status()
+        except Errors.InvalidServiceName as e:
+            print(f"Service Error: {e}")
+        except Errors.NetworkError as e:
+            print(f"Network Error: {e}")
     elif option == 2:
         instance.open_url()
     elif option == 3:

diff --git a/README.md b/README.md
@@ -9,6 +9,16 @@
 > *It scrapes the status message from DownDetector's Website and shows it in the terminal.
 So you don't need to open up the Website for checking the status of a service.*
 
+### Recent Updates (December 2025)
+
+The scraper has been updated to work with the modern DownDetector website structure:
+- ✅ Improved bot detection bypass with modern browser headers
+- ✅ Multi-strategy HTML parsing for resilience against website changes
+- ✅ Better error handling and timeout support
+- ✅ Backward compatible with older website structures
+
+See [CHANGELOG.md](CHANGELOG.md) for detailed changes.
+
 ## CONTRIBUTING
 
 > *For details on how to contribute to this project see [this](https://github.com/aaryanrr/DownDetector-CLI/blob/main/CONTRIBUTING.md)*
diff --git a/src/Errors.py b/src/Errors.py
@@ -3,3 +3,8 @@
 class InvalidServiceName(Exception):
     # Raised when the Service Name entered is Invalid
     pass
+
+
+class NetworkError(Exception):
+    # Raised when there are network/connection issues
+    pass
diff --git a/src/Scraper.py b/src/Scraper.py
@@ -1,39 +1,108 @@
 import webbrowser
 import requests
-from requests.exceptions import ConnectionError
 from bs4 import BeautifulSoup
 
 import src.Errors as Errors
 
+# Constants for HTTP headers to mimic real browser behavior
+USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
 
-# Class for thw URL Instance
+DEFAULT_HEADERS = {
+    "User-Agent": USER_AGENT,
+    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
+    "Accept-Language": "en-US,en;q=0.9",
+    "Accept-Encoding": "gzip, deflate, br",
+    "DNT": "1",
+    "Connection": "keep-alive",
+    "Upgrade-Insecure-Requests": "1",
+    "Sec-Fetch-Dest": "document",
+    "Sec-Fetch-Mode": "navigate",
+    "Sec-Fetch-Site": "none",
+    "Cache-Control": "max-age=0",
+    "Referer": "https://www.google.com/"
+}
+
+# Minimum length for status text to be considered valid
+MIN_STATUS_TEXT_LENGTH = 5
+
+
+# Class for the URL Instance
 class URLInstance(object):
 
     url = "https://downdetector.com/status/"
 
     def __init__(self, service_name):
         self.url = f"{self.url}" + service_name
+        # Create a session for better connection handling and bot protection bypass
+        self.session = requests.Session()
 
     # Scrape the Status of the Service from the Page
     def get_status(self):
-        header = {
-            "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) "
-                          "Chrome/74.0.3729.169 Safari/537.36",
-            'referer': 'https://www.google.com/'
-        }
         try:
-            page = requests.get(self.url, headers=header)
-            soup = BeautifulSoup(page.content, 'html5lib')
-            status = soup.find('div', attrs={'id': 'company'})
-            text = status.find('div', attrs={'class': 'h2 entry-title'})
-            print(text.text.strip())
-
-        except AttributeError:
-            # Expecting AttributeError if the Name given is Invalid
-            # A NoneType object won't have the attribute .text as used above
-            raise Errors.InvalidServiceName("Name of the Service is Invalid!")
-        else:
-            pass
+            # Use session for better connection handling
+            response = self.session.get(self.url, headers=DEFAULT_HEADERS, timeout=10)
+            response.raise_for_status()
+
+            soup = BeautifulSoup(response.content, 'html5lib')
+
+            # Try multiple selectors to find the status message (more robust approach)
+            status_text = None
+
+            # Strategy 1: Try original selector for backward compatibility
+            company_div = soup.find('div', attrs={'id': 'company'})
+            if company_div:
+                text_elem = company_div.find('div', attrs={'class': 'h2 entry-title'})
+                if text_elem:
+                    status_text = text_elem.text.strip()
+
+            # Strategy 2: Try to find entry-title class directly (more common in modern design)
+            if not status_text:
+                entry_title = soup.find(['h1', 'h2', 'h3', 'div'], class_='entry-title')
+                if entry_title:
+                    status_text = entry_title.text.strip()
+
+            # Strategy 3: Try to find status-related elements with common class patterns
+            if not status_text:
+                # Look for elements with "status" in class name
+                def is_valid_status_element(elem):
+                    class_names = elem.get('class', [])
+                    if any('status' in str(c).lower() or 'entry-title' in str(c).lower() for c in class_names):
+                        text = elem.text.strip()
+                        # Filter out very short text or common non-status text
+                        if text and len(text) > MIN_STATUS_TEXT_LENGTH and text.lower() not in ['status', 'info', 'information']:
+                            return True
+                    return False
+
+                status_elem = next((elem for elem in soup.find_all(['div', 'span', 'h1', 'h2', 'h3']) 
+                                   if is_valid_status_element(elem)), None)
+                if status_elem:
+                    status_text = status_elem.text.strip()
+
+            # Strategy 4: Look for the page title as fallback
+            if not status_text:
+                page_title = soup.find('h1')
+                if page_title:
+                    text = page_title.text.strip()
+                    text_lower = text.lower()
+                    # Only use if it looks like a status message (contains certain keywords)
+                    if any(keyword in text_lower for keyword in ['problem', 'issue', 'outage', 'down', 'working', 'reports', 'no problems']):
+                        status_text = text
+
+            if status_text:
+                print(status_text)
+            else:
+                raise Errors.InvalidServiceName(
+                    f"Unable to find status for '{self.url.split('/')[-1]}'. "
+                    "Please verify the service name is correct (e.g., 'facebook', 'twitter', 'instagram'). "
+                    "Visit https://downdetector.com to confirm the service exists."
+                )
+
+        except requests.exceptions.RequestException as e:
+            print(f"Error fetching the page: {e}")
+            raise Errors.NetworkError("Could not fetch the page. Please check your internet connection and try again.")
+        except Exception as e:
+            print(f"Unexpected error: {e}")
+            raise
 
     # Prints the URL for the Status Page of the Service
     def get_url(self):
@@ -52,11 +121,18 @@ def get_base_url(cls):
 # Function to Check the Internet Connection
 def check_connection():
     try:
-        requests.get("https://downdetector.com")
-    except ConnectionError:
-        print("This program requires an active Internet Connection!")
-    else:
+        response = requests.get("https://downdetector.com", headers={"User-Agent": USER_AGENT}, timeout=10)
+        response.raise_for_status()
         print("All Good 👍")
+    except requests.exceptions.ConnectionError:
+        print("This program requires an active Internet Connection!")
+        quit()
+    except requests.exceptions.Timeout:
+        print("Connection timeout. Please check your internet connection!")
+        quit()
+    except Exception as e:
+        print(f"Connection check failed: {e}")
+        print("Proceeding anyway...")
 
 
 # Function for the Menu