Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,29 @@ proxy = FreeProxy(country_id=['US', 'BR'], timeout=0.3, rand=True).get()

If there are no working proxies with provided parameters script raises `FreeProxyException` with `There are no working proxies at this time.` message.

- **Async usage**
```
import asyncio
from fp import FreeProxy

async def get_with_params():
proxy = FreeProxy(country_id=['US'], timeout=1.0, rand=True)
working_proxy = await FreeProxy().get()
print(f"Working proxy: {working_proxy}")

or

async def get_with_params():
working_proxy = await FreeProxy().get()
print(f"Working proxy: {working_proxy}")


asyncio.run(get_with_params())


```


## CHANGELOG

---
Expand Down
169 changes: 169 additions & 0 deletions fp/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,169 @@
# Created by .ignore support plugin (hsz.mobi)
### Python template
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class

# C extensions
*.so

# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST

# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec

# Installer logs
pip-log.txt
pip-delete-this-directory.txt

# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/

# Translations
*.mo
*.pot

# Django stuff:
*.log
local_settings.py
db.sqlite3

# Flask stuff:
instance/
.webassets-cache

# Scrapy stuff:
.scrapy

# Sphinx documentation
docs/_build/

# PyBuilder
target/

# Jupyter Notebook
.ipynb_checkpoints

# pyenv
.python-version

# celery beat schedule file
celerybeat-schedule

# SageMath parsed files
*.sage.py

# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/

# Spyder project settings
.spyderproject
.spyproject

# Rope project settings
.ropeproject

# mkdocs documentation
/site

# mypy
.mypy_cache/
### JetBrains template
# Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio and WebStorm
# Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839

# User-specific stuff
.idea/**/workspace.xml
.idea/**/tasks.xml
.idea/**/usage.statistics.xml
.idea/**/dictionaries
.idea/**/shelf

# Sensitive or high-churn files
.idea/**/dataSources/
.idea/**/dataSources.ids
.idea/**/dataSources.local.xml
.idea/**/sqlDataSources.xml
.idea/**/dynamic.xml
.idea/**/uiDesigner.xml
.idea/**/dbnavigator.xml

# Gradle
.idea/**/gradle.xml
.idea/**/libraries

# Gradle and Maven with auto-import
# When using Gradle or Maven with auto-import, you should exclude module files,
# since they will be recreated, and may cause churn. Uncomment if using
# auto-import.
# .idea/modules.xml
# .idea/*.iml
# .idea/modules

# CMake
cmake-build-*/

# Mongo Explorer plugin
.idea/**/mongoSettings.xml

# File-based project format
*.iws

# IntelliJ
out/

# mpeltonen/sbt-idea plugin
.idea_modules/

# JIRA plugin
atlassian-ide-plugin.xml

# Cursive Clojure plugin
.idea/replstate.xml

# Crashlytics plugin (for Android Studio and IntelliJ)
com_crashlytics_export_strings.xml
crashlytics.properties
crashlytics-build.properties
fabric.properties

# Editor-based Rest Client
.idea/httpRequests
.idea/
.vscode/
88 changes: 52 additions & 36 deletions fp/fp.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,23 @@
#!/usr/bin/env python3

import asyncio
import random
import re

import aiohttp
import lxml.html as lh
import requests

from fp.errors import FreeProxyException


class FreeProxy:
'''
"""
FreeProxy class scrapes proxies from <https://www.sslproxies.org/>,
<https://www.us-proxy.org/>, <https://free-proxy-list.net/uk-proxy.html>,
and <https://free-proxy-list.net> and checks if proxy is working.
There is possibility to filter proxies by country and acceptable timeout.
You can also randomize list of proxies from where script would get first
and <https://free-proxy-list.net> and checks if proxy is working.
There is possibility to filter proxies by country and acceptable timeout.
You can also randomize list of proxies from where script would get first
working proxy.
'''
"""

def __init__(self, country_id=None, timeout=0.5, rand=False, anonym=False, elite=False, google=None, https=False):
self.country_id = country_id
Expand All @@ -27,17 +28,21 @@ def __init__(self, country_id=None, timeout=0.5, rand=False, anonym=False, elite
self.google = google
self.schema = 'https' if https else 'http'

def get_proxy_list(self, repeat):
async def get_proxy_list(self, repeat):
try:
page = requests.get(self.__website(repeat))
doc = lh.fromstring(page.content)
except requests.exceptions.RequestException as e:
raise FreeProxyException(
f'Request to {self.__website(repeat)} failed') from e
async with aiohttp.ClientSession() as session:
async with session.get(self.__website(repeat)) as response:
response.raise_for_status()
content = await response.text()
doc = lh.fromstring(content)
except aiohttp.ClientError as e:
raise FreeProxyException(f'Request to {self.__website(repeat)} failed') from e

try:
tr_elements = doc.xpath('//*[@id="list"]//tr')
return [f'{tr_elements[i][0].text_content()}:{tr_elements[i][1].text_content()}'
for i in range(1, len(tr_elements)) if self.__criteria(tr_elements[i])]
proxies = [f'{tr_elements[i][0].text_content()}:{tr_elements[i][1].text_content()}'
for i in range(1, len(tr_elements)) if self.__criteria(tr_elements[i])]
return proxies
except Exception as e:
raise FreeProxyException('Failed to get list of proxies') from e

Expand All @@ -57,39 +62,50 @@ def __criteria(self, row_elements):
elite_criteria = True if not self.elite else 'elite' in row_elements[4].text_content(
)
anonym_criteria = True if (
not self.anonym) or self.elite else 'anonymous' == row_elements[4].text_content()
not self.anonym) or self.elite else 'anonymous' == row_elements[4].text_content()
switch = {'yes': True, 'no': False}
google_criteria = True if self.google is None else self.google == switch.get(
row_elements[5].text_content())
https_criteria = True if self.schema == 'http' else row_elements[6].text_content(
).lower() == 'yes'
return country_criteria and elite_criteria and anonym_criteria and google_criteria and https_criteria

def get(self, repeat=False):
'''Returns a working proxy that matches the specified parameters.'''
proxy_list = self.get_proxy_list(repeat)
async def get(self, repeat=False):
"""Returns a working proxy that matches the specified parameters."""
proxy_list = await self.get_proxy_list(repeat)
if self.random:
random.shuffle(proxy_list)
working_proxy = None
for proxy_address in proxy_list:
proxies = {self.schema: f'http://{proxy_address}'}
try:
working_proxy = self.__check_if_proxy_is_working(proxies)
if working_proxy:
return working_proxy
except requests.exceptions.RequestException:
continue
async with aiohttp.ClientSession() as session:
for proxy_address in proxy_list:
proxies = {self.schema: f'http://{proxy_address}'}
try:
working_proxy = await self.__check_if_proxy_is_working(session, proxies)
if working_proxy:
return working_proxy
except aiohttp.ClientError:
continue
if not working_proxy and not repeat:
if self.country_id is not None:
self.country_id = None
return self.get(repeat=True)
raise FreeProxyException(
'There are no working proxies at this time.')
return await self.get(repeat=True)
raise FreeProxyException('There are no working proxies at this time.')

def __check_if_proxy_is_working(self, proxies):
async def __check_if_proxy_is_working(self, session, proxies):
url = f'{self.schema}://www.google.com'
ip = proxies[self.schema].split(':')[1][2:]
with requests.get(url, proxies=proxies, timeout=self.timeout, stream=True) as r:
if r.raw.connection.sock and r.raw.connection.sock.getpeername()[0] == ip:
return proxies[self.schema]
return
try:
async with session.get(url, proxy=proxies[self.schema], timeout=self.timeout) as response:
if response.status == 200:
if response.connection:
pattern = r"URL\('(.+?)'\)"
match = re.search(pattern, str(response.connection))
if match:
return proxies[self.schema]
else:
pass
except asyncio.TimeoutError:
pass
except aiohttp.ClientError:
pass

return None
16 changes: 13 additions & 3 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
lxml==4.9.1
pip-chill==0.1.8
requests==2.21.0
aiohttp==3.8.4
aiosignal==1.3.1
async-timeout==4.0.2
attrs==23.1.0
certifi==2023.5.7
charset-normalizer==3.1.0
frozenlist==1.3.3
idna==3.4
lxml==4.9.2
multidict==6.0.4
soupsieve==2.4.1
urllib3==2.0.2
yarl==1.9.2