diff --git a/site-sleuth/SEO-rival-insights/lambda_function.py b/site-sleuth/SEO-rival-insights/lambda_function.py new file mode 100644 index 0000000..4cf612a --- /dev/null +++ b/site-sleuth/SEO-rival-insights/lambda_function.py @@ -0,0 +1,238 @@ +import json +import boto3 +import os +import requests +import emoji +import markdown +from bs4 import BeautifulSoup, Comment + +s3_client = boto3.client('s3') +lambda_client = boto3.client('lambda') + +HEADERS = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:98.0) Gecko/20100101 Firefox/98.0", + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8", + "Accept-Language": "en-US,en;q=0.5", + "Accept-Encoding": "gzip, deflate", + "Connection": "keep-alive", + "Upgrade-Insecure-Requests": "1", + "Sec-Fetch-Dest": "document", + "Sec-Fetch-Mode": "navigate", + "Sec-Fetch-Site": "none", + "Sec-Fetch-User": "?1", + "Cache-Control": "max-age=0", +} + +def handler(event, context): + try: + # Extract environment variables + bucket_name = os.environ['PI_EXECUTION_S3_BUCKET_NAME'] + result_folder = os.environ['PI_RESULTS_FOLDER'] + openai_function = os.environ['PI_OPENAI_FUNCTION'] + + # Extract event data + execution_id = event['execution_id'] + user_id = event['user_id'] + product_id = event['product_id'] + token = event['token'] + custom_inputs = event['custom_inputs'] + competitor_url = custom_inputs['competitor_url'] + user_website_url = custom_inputs['user_website_url'] + + # Crawl and extract website content for both the user and competitor websites + user_data = crawl_website(user_website_url) + competitor_data = crawl_website(competitor_url) + + # Generate a detailed prompt for OpenAI to analyze the competitor + prompt = generate_competitor_analysis_prompt(user_data, competitor_data, user_website_url, competitor_url) + + # Invoke the OpenAI function for competitor SEO analysis + openai_payload = { + "execution_id": execution_id, + "user_id": user_id, + "product_id": product_id, + "service": "chat-gpt-4o-mini", + "size": "11x", + "prompt": prompt + } + + response = lambda_client.invoke( + FunctionName=openai_function, + InvocationType='RequestResponse', + Payload=json.dumps(openai_payload) + ) + + response_payload = json.load(response['Payload']) + status_code = response_payload.get('status_code') + if status_code != 200: + raise Exception(f"OpenAI chat function returned {status_code} as status code with body {str(response_payload.get('body'))}") + function_result = response_payload.get('body') + + if function_result is None: + raise Exception("No result from OpenAI chat function") + + # Save the result to S3 + result_key = f"{result_folder}/{execution_id}/competitor_seo_result.json" + s3_client.put_object(Bucket=bucket_name, Key=result_key, Body=json.dumps(function_result, indent=4)) + + # Send the result as HTML to the endpoint + html_message = generate_html_message(execution_id, user_id, product_id, function_result) + + send_result_to_wordpress({ + "execution_id": execution_id, + "user_id": user_id, + "product_id": product_id, + "token": token, + "status": "successful", + "results": html_message + }) + + return { + 'statusCode': 200, + 'body': json.dumps({ + 'message': 'Task executed successfully' + }) + } + + except Exception as e: + print(f"Error: {str(e)}") + + send_result_to_wordpress({ + "execution_id": execution_id, + "user_id": user_id, + "product_id": product_id, + "token": token, + "status": "failed", + "results": f""" +
+

Error: {str(e)}

+
+ """ + }) + + return { + 'statusCode': 500, + 'body': json.dumps({ + 'message': str(e) + }) + } + +def crawl_website(url): + response = requests.get(url, headers=HEADERS) + html = response.text + + # Parse HTML using BeautifulSoup + soup = BeautifulSoup(html, 'html.parser') + + # Extract relevant SEO elements + title = soup.title.string if soup.title else "No title found" + description = soup.find("meta", {"name": "description"})['content'] if soup.find("meta", {"name": "description"}) else "No description found" + keywords = soup.find("meta", {"name": "keywords"})['content'] if soup.find("meta", {"name": "keywords"}) else "No keywords found" + h1_tags = [h1.get_text() for h1 in soup.find_all('h1')] + h2_tags = [h2.get_text() for h2 in soup.find_all('h2')] + h3_tags = [h3.get_text() for h3 in soup.find_all('h3')] + alt_texts = [img['alt'] for img in soup.find_all('img') if img.has_attr('alt')] + links = [a['href'] for a in soup.find_all('a') if a.has_attr('href')] + + # Removing unnecessary parts of the HTML + # Remove script, style, and comments + for script in soup(["script", "style"]): + script.extract() + comments = soup.findAll(text=lambda text: isinstance(text, Comment)) + [comment.extract() for comment in comments] + + # Remove unnecessary attributes from tags + for tag in soup.find_all(True): + # Only keep certain attributes for tags + if tag.name == 'a': + attrs = {key: tag.attrs[key] for key in ['href'] if key in tag.attrs} + tag.attrs = attrs + # Remove all attributes for other tags + else: + tag.attrs = {} + + # Cleaned HTML content + cleaned_html = soup.prettify() + + crawled_data = { + "title": title, + "description": description, + "keywords": keywords, + "h1_tags": h1_tags, + "h2_tags": h2_tags, + "h3_tags": h3_tags, + "alt_texts": alt_texts, + "links": links, + "cleaned_html": cleaned_html # Reduced HTML content for analysis + } + + return crawled_data + +def generate_competitor_analysis_prompt(user_data, competitor_data, user_website_url, competitor_url): + return f""" + You are an expert in SEO analysis. I have provided you with the relevant crawled data from two websites: the user's website ({user_website_url}) and a competitor's website ({competitor_url}). + Here is the information for the user's website: + + - Title: {user_data['title']} + - Description: {user_data['description']} + - Keywords: {user_data['keywords']} + - H1 Tags: {', '.join(user_data['h1_tags'])} + - H2 Tags: {', '.join(user_data['h2_tags'])} + - H3 Tags: {', '.join(user_data['h3_tags'])} + - Alt Texts: {', '.join(user_data['alt_texts'])} + - Links: {', '.join(user_data['links'])} + - Cleaned HTML: ``` {user_data['cleaned_html']} ``` + + And here is the information for the competitor's website: + + - Title: {competitor_data['title']} + - Description: {competitor_data['description']} + - Keywords: {competitor_data['keywords']} + - H1 Tags: {', '.join(competitor_data['h1_tags'])} + - H2 Tags: {', '.join(competitor_data['h2_tags'])} + - H3 Tags: {', '.join(competitor_data['h3_tags'])} + - Alt Texts: {', '.join(competitor_data['alt_texts'])} + - Links: {', '.join(competitor_data['links'])} + - Cleaned HTML: ``` {competitor_data['cleaned_html']} ``` + + Please perform a detailed SEO competitor analysis, focusing on the following aspects: + + 1. Comparative SEO performance of the user's website and the competitor's website. + 2. Strengths and weaknesses of both websites in terms of SEO. + 3. Opportunities for the user to improve and outperform the competitor in SEO. + 4. Specific recommendations for improving the user's website SEO to gain an edge over the competitor. + + Provide your analysis in a structured format with actionable insights. + """ + +def generate_html_message(execution_id, user_id, product_id, response): + response_text = response['choices'][0]['message']['content'] + emojized_text = emoji.emojize(response_text) + html_str = markdown.markdown(emojized_text) + return f""" +
+

SEO Competitor Analysis Result

+

Execution ID: {execution_id}

+

User ID: {user_id}

+

Product ID: {product_id}

+

Analysis Results:
+

{html_str}
+

+
+ """ + +def send_result_to_wordpress(result): + post_data = json.dumps(result) + wordpress_url = 'https://promptintellect.com/wp-json/product-extension/v1/lambda-results' + + headers = { + 'Content-Type': 'application/json', + 'Content-Length': str(len(post_data)) + } + + response = requests.post(wordpress_url, data=post_data, headers=headers) + + if response.status_code != 200: + raise Exception(f"Failed to send result to WordPress. Status code: {response.status_code}, Response: {response.text}") + + return response.text diff --git a/site-sleuth/URL-SEO-analyzer/lambda_function.py b/site-sleuth/URL-SEO-analyzer/lambda_function.py new file mode 100644 index 0000000..ee1ed95 --- /dev/null +++ b/site-sleuth/URL-SEO-analyzer/lambda_function.py @@ -0,0 +1,229 @@ +import json +import boto3 +import os +import requests +import emoji +import markdown +from bs4 import BeautifulSoup, Comment # Import Comment +from urllib.request import urlopen + +s3_client = boto3.client('s3') +lambda_client = boto3.client('lambda') + +HEADERS = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:98.0) Gecko/20100101 Firefox/98.0", + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8", + "Accept-Language": "en-US,en;q=0.5", + "Accept-Encoding": "gzip, deflate", + "Connection": "keep-alive", + "Upgrade-Insecure-Requests": "1", + "Sec-Fetch-Dest": "document", + "Sec-Fetch-Mode": "navigate", + "Sec-Fetch-Site": "none", + "Sec-Fetch-User": "?1", + "Cache-Control": "max-age=0", +} + +def handler(event, context): + try: + # Extract environment variables + bucket_name = os.environ['PI_EXECUTION_S3_BUCKET_NAME'] + result_folder = os.environ['PI_RESULTS_FOLDER'] + openai_function = os.environ['PI_OPENAI_FUNCTION'] + + # Extract event data + execution_id = event['execution_id'] + user_id = event['user_id'] + product_id = event['product_id'] + token = event['token'] + custom_inputs = event['custom_inputs'] + website_url = custom_inputs['website_url'] + + # Crawl and extract website content + crawled_data = crawl_website(website_url) + + # Generate a detailed prompt for OpenAI + prompt = generate_seo_analysis_prompt(crawled_data, website_url) + + # Invoke the OpenAI function for SEO analysis + openai_payload = { + "execution_id": execution_id, + "user_id": user_id, + "product_id": product_id, + "service": "chat-gpt-4o-mini", + "size": "9x", + "prompt": prompt + } + + response = lambda_client.invoke( + FunctionName=openai_function, + InvocationType='RequestResponse', + Payload=json.dumps(openai_payload) + ) + + response_payload = json.load(response['Payload']) + status_code = response_payload.get('status_code') + if status_code != 200: + raise Exception(f"OpenAI chat function returns {status_code} as status code with body {str(response_payload.get('body'))}") + function_result = response_payload.get('body') + + if function_result is None: + raise Exception("No result from OpenAI chat function") + + # Save the result to S3 + result_key = f"{result_folder}/{execution_id}/seo_result.json" + s3_client.put_object(Bucket=bucket_name, Key=result_key, Body=json.dumps(function_result, indent=4)) + + # Send the result as HTML to the endpoint + html_message = generate_html_message(execution_id, user_id, product_id, function_result) + + send_result_to_wordpress({ + "execution_id": execution_id, + "user_id": user_id, + "product_id": product_id, + "token": token, + "status": "successful", + "results": html_message + }) + + return { + 'statusCode': 200, + 'body': json.dumps({ + 'message': 'Task executed successfully' + }) + } + + except Exception as e: + print(f"Error: {str(e)}") + + send_result_to_wordpress({ + "execution_id": execution_id, + "user_id": user_id, + "product_id": product_id, + "token": token, + "status": "failed", + "results": f""" +
+

Error: {str(e)}

+
+ """ + }) + + return { + 'statusCode': 500, + 'body': json.dumps({ + 'message': str(e) + }) + } + +def crawl_website(url): + response = requests.get(url, headers=HEADERS) + html = response.text + + # Parse HTML using BeautifulSoup + soup = BeautifulSoup(html, 'html.parser') + + # Extract relevant SEO elements + title = soup.title.string if soup.title else "No title found" + description = soup.find("meta", {"name": "description"})['content'] if soup.find("meta", {"name": "description"}) else "No description found" + keywords = soup.find("meta", {"name": "keywords"})['content'] if soup.find("meta", {"name": "keywords"}) else "No keywords found" + h1_tags = [h1.get_text() for h1 in soup.find_all('h1')] + h2_tags = [h2.get_text() for h2 in soup.find_all('h2')] + h3_tags = [h3.get_text() for h3 in soup.find_all('h3')] + alt_texts = [img['alt'] for img in soup.find_all('img') if img.has_attr('alt')] + links = [a['href'] for a in soup.find_all('a') if a.has_attr('href')] + + # Removing unnecessary parts of the HTML + # Remove script, style, and comments + for script in soup(["script", "style"]): + script.extract() + comments = soup.findAll(text=lambda text: isinstance(text, Comment)) + [comment.extract() for comment in comments] + + # Remove unnecessary attributes from tags + for tag in soup.find_all(True): + # Only keep certain attributes for
tags + if tag.name == 'a': + attrs = {key: tag.attrs[key] for key in ['href'] if key in tag.attrs} + tag.attrs = attrs + # Remove all attributes for other tags + else: + tag.attrs = {} + + # Cleaned HTML content + cleaned_html = soup.prettify() + + crawled_data = { + "title": title, + "description": description, + "keywords": keywords, + "h1_tags": h1_tags, + "h2_tags": h2_tags, + "h3_tags": h3_tags, + "alt_texts": alt_texts, + "links": links, + "cleaned_html": cleaned_html # Reduced HTML content for analysis + } + + return crawled_data + +def generate_seo_analysis_prompt(crawled_data, website_url): + return f""" + You are an expert in SEO analysis. I have provided you with the relevant crawled data from the website: {website_url}. + Here is the information: + + - Title: {crawled_data['title']} + - Description: {crawled_data['description']} + - Keywords: {crawled_data['keywords']} + - H1 Tags: {', '.join(crawled_data['h1_tags'])} + - H2 Tags: {', '.join(crawled_data['h2_tags'])} + - H3 Tags: {', '.join(crawled_data['h3_tags'])} + - Alt Texts: {', '.join(crawled_data['alt_texts'])} + - Links: {', '.join(crawled_data['links'])} + + Here is the cleaned HTML content of the webpage (unnecessary tags and scripts removed): + {crawled_data['cleaned_html']} + + Please analyze this website in terms of SEO and provide detailed insights and recommendations. Your analysis should include, but not be limited to, the following: + + 1. Overall SEO performance of the website. + 2. Issues with meta tags, titles, and descriptions. + 3. Use of headings (H1, H2, H3, etc.) and their effectiveness. + 4. Keyword optimization and suggestions for improvement. + 5. Use of alt texts for images and suggestions for improvement. + 6. Analysis of internal and external links. + 7. Any missing critical SEO elements. + 8. Recommendations for improving the website's SEO. + + Provide your analysis in a structured format with actionable insights. + """ + +def generate_html_message(execution_id, user_id, product_id, response): + response_text = response['choices'][0]['message']['content'] + emojized_text = emoji.emojize(response_text) + html_str = markdown.markdown(emojized_text) + return f""" +
+

SEO Analysis Result

+

Execution ID: {execution_id}

+

User ID: {user_id}

+

Product ID: {product_id}

+

Analysis Results:
+

{html_str}
+

+
+ """ + +def send_result_to_wordpress(result): + post_data = json.dumps(result) + wordpress_url = 'https://promptintellect.com/wp-json/product-extension/v1/lambda-results' + + headers = { + 'Content-Type': 'application/json', + 'Content-Length': str(len(post_data)) + } + + response = requests.post(wordpress_url, headers=headers, data=post_data) + + if response.status_code != 200: + raise Exception(f"Unexpected status code: {response.status_code}, {response.text}")