diff --git a/README.md b/README.md
index 9cf3c50..ce5f5e1 100644
--- a/README.md
+++ b/README.md
@@ -127,6 +127,29 @@ proxy = FreeProxy(country_id=['US', 'BR'], timeout=0.3, rand=True).get()
If there are no working proxies with provided parameters script raises `FreeProxyException` with `There are no working proxies at this time.` message.
+- **Async usage**
+```
+import asyncio
+from fp import FreeProxy
+
+async def get_with_params():
+ proxy = FreeProxy(country_id=['US'], timeout=1.0, rand=True)
+ working_proxy = await FreeProxy().get()
+ print(f"Working proxy: {working_proxy}")
+
+or
+
+async def get_with_params():
+ working_proxy = await FreeProxy().get()
+ print(f"Working proxy: {working_proxy}")
+
+
+asyncio.run(get_with_params())
+
+
+```
+
+
## CHANGELOG
---
diff --git a/fp/.gitignore b/fp/.gitignore
new file mode 100644
index 0000000..3ca93a1
--- /dev/null
+++ b/fp/.gitignore
@@ -0,0 +1,169 @@
+# Created by .ignore support plugin (hsz.mobi)
+### Python template
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+# Usually these files are written by a python script from a template
+# before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+### JetBrains template
+# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm
+# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
+
+# User-specific stuff
+.idea/**/workspace.xml
+.idea/**/tasks.xml
+.idea/**/usage.statistics.xml
+.idea/**/dictionaries
+.idea/**/shelf
+
+# Sensitive or high-churn files
+.idea/**/dataSources/
+.idea/**/dataSources.ids
+.idea/**/dataSources.local.xml
+.idea/**/sqlDataSources.xml
+.idea/**/dynamic.xml
+.idea/**/uiDesigner.xml
+.idea/**/dbnavigator.xml
+
+# Gradle
+.idea/**/gradle.xml
+.idea/**/libraries
+
+# Gradle and Maven with auto-import
+# When using Gradle or Maven with auto-import, you should exclude module files,
+# since they will be recreated, and may cause churn. Uncomment if using
+# auto-import.
+# .idea/modules.xml
+# .idea/*.iml
+# .idea/modules
+
+# CMake
+cmake-build-*/
+
+# Mongo Explorer plugin
+.idea/**/mongoSettings.xml
+
+# File-based project format
+*.iws
+
+# IntelliJ
+out/
+
+# mpeltonen/sbt-idea plugin
+.idea_modules/
+
+# JIRA plugin
+atlassian-ide-plugin.xml
+
+# Cursive Clojure plugin
+.idea/replstate.xml
+
+# Crashlytics plugin (for Android Studio and IntelliJ)
+com_crashlytics_export_strings.xml
+crashlytics.properties
+crashlytics-build.properties
+fabric.properties
+
+# Editor-based Rest Client
+.idea/httpRequests
+.idea/
+.vscode/
diff --git a/fp/fp.py b/fp/fp.py
index 7614665..e3f73b0 100644
--- a/fp/fp.py
+++ b/fp/fp.py
@@ -1,22 +1,23 @@
#!/usr/bin/env python3
+import asyncio
import random
+import re
+import aiohttp
import lxml.html as lh
-import requests
-
from fp.errors import FreeProxyException
class FreeProxy:
- '''
+ """
FreeProxy class scrapes proxies from ,
, ,
- and and checks if proxy is working.
- There is possibility to filter proxies by country and acceptable timeout.
- You can also randomize list of proxies from where script would get first
+ and and checks if proxy is working.
+ There is possibility to filter proxies by country and acceptable timeout.
+ You can also randomize list of proxies from where script would get first
working proxy.
- '''
+ """
def __init__(self, country_id=None, timeout=0.5, rand=False, anonym=False, elite=False, google=None, https=False):
self.country_id = country_id
@@ -27,17 +28,21 @@ def __init__(self, country_id=None, timeout=0.5, rand=False, anonym=False, elite
self.google = google
self.schema = 'https' if https else 'http'
- def get_proxy_list(self, repeat):
+ async def get_proxy_list(self, repeat):
try:
- page = requests.get(self.__website(repeat))
- doc = lh.fromstring(page.content)
- except requests.exceptions.RequestException as e:
- raise FreeProxyException(
- f'Request to {self.__website(repeat)} failed') from e
+ async with aiohttp.ClientSession() as session:
+ async with session.get(self.__website(repeat)) as response:
+ response.raise_for_status()
+ content = await response.text()
+ doc = lh.fromstring(content)
+ except aiohttp.ClientError as e:
+ raise FreeProxyException(f'Request to {self.__website(repeat)} failed') from e
+
try:
tr_elements = doc.xpath('//*[@id="list"]//tr')
- return [f'{tr_elements[i][0].text_content()}:{tr_elements[i][1].text_content()}'
- for i in range(1, len(tr_elements)) if self.__criteria(tr_elements[i])]
+ proxies = [f'{tr_elements[i][0].text_content()}:{tr_elements[i][1].text_content()}'
+ for i in range(1, len(tr_elements)) if self.__criteria(tr_elements[i])]
+ return proxies
except Exception as e:
raise FreeProxyException('Failed to get list of proxies') from e
@@ -57,7 +62,7 @@ def __criteria(self, row_elements):
elite_criteria = True if not self.elite else 'elite' in row_elements[4].text_content(
)
anonym_criteria = True if (
- not self.anonym) or self.elite else 'anonymous' == row_elements[4].text_content()
+ not self.anonym) or self.elite else 'anonymous' == row_elements[4].text_content()
switch = {'yes': True, 'no': False}
google_criteria = True if self.google is None else self.google == switch.get(
row_elements[5].text_content())
@@ -65,31 +70,42 @@ def __criteria(self, row_elements):
).lower() == 'yes'
return country_criteria and elite_criteria and anonym_criteria and google_criteria and https_criteria
- def get(self, repeat=False):
- '''Returns a working proxy that matches the specified parameters.'''
- proxy_list = self.get_proxy_list(repeat)
+ async def get(self, repeat=False):
+ """Returns a working proxy that matches the specified parameters."""
+ proxy_list = await self.get_proxy_list(repeat)
if self.random:
random.shuffle(proxy_list)
working_proxy = None
- for proxy_address in proxy_list:
- proxies = {self.schema: f'http://{proxy_address}'}
- try:
- working_proxy = self.__check_if_proxy_is_working(proxies)
- if working_proxy:
- return working_proxy
- except requests.exceptions.RequestException:
- continue
+ async with aiohttp.ClientSession() as session:
+ for proxy_address in proxy_list:
+ proxies = {self.schema: f'http://{proxy_address}'}
+ try:
+ working_proxy = await self.__check_if_proxy_is_working(session, proxies)
+ if working_proxy:
+ return working_proxy
+ except aiohttp.ClientError:
+ continue
if not working_proxy and not repeat:
if self.country_id is not None:
self.country_id = None
- return self.get(repeat=True)
- raise FreeProxyException(
- 'There are no working proxies at this time.')
+ return await self.get(repeat=True)
+ raise FreeProxyException('There are no working proxies at this time.')
- def __check_if_proxy_is_working(self, proxies):
+ async def __check_if_proxy_is_working(self, session, proxies):
url = f'{self.schema}://www.google.com'
- ip = proxies[self.schema].split(':')[1][2:]
- with requests.get(url, proxies=proxies, timeout=self.timeout, stream=True) as r:
- if r.raw.connection.sock and r.raw.connection.sock.getpeername()[0] == ip:
- return proxies[self.schema]
- return
+ try:
+ async with session.get(url, proxy=proxies[self.schema], timeout=self.timeout) as response:
+ if response.status == 200:
+ if response.connection:
+ pattern = r"URL\('(.+?)'\)"
+ match = re.search(pattern, str(response.connection))
+ if match:
+ return proxies[self.schema]
+ else:
+ pass
+ except asyncio.TimeoutError:
+ pass
+ except aiohttp.ClientError:
+ pass
+
+ return None
diff --git a/requirements.txt b/requirements.txt
index e295a70..092d12d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,3 +1,13 @@
-lxml==4.9.1
-pip-chill==0.1.8
-requests==2.21.0
+aiohttp==3.8.4
+aiosignal==1.3.1
+async-timeout==4.0.2
+attrs==23.1.0
+certifi==2023.5.7
+charset-normalizer==3.1.0
+frozenlist==1.3.3
+idna==3.4
+lxml==4.9.2
+multidict==6.0.4
+soupsieve==2.4.1
+urllib3==2.0.2
+yarl==1.9.2