diff --git a/.gitignore b/.gitignore
index c55de92..cdce0cc 100644
--- a/.gitignore
+++ b/.gitignore
@@ -21,6 +21,7 @@ logs/
.idea/**/dynamic.xml
.idea/**/uiDesigner.xml
.idea/**/dbnavigator.xml
+*.info
# Gradle
.idea/**/gradle.xml
@@ -174,6 +175,7 @@ venv.bak/
.mypy_cache/
### VisualStudioCode ###
+.vscode/
.vscode/*
!.vscode/settings.json
!.vscode/tasks.json
diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..024875c
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,27 @@
+notifications:
+ email:
+ on_success: never
+ on_failure: always
+language: python
+python: 3.6
+addons:
+ apt:
+ update: true
+env:
+ global:
+ - GH_USER=Twiddly
+ - GH_MAIL=pew@pewpew.moe
+ - REPO=happypandax/plugins
+ - secure: sogo+NT/2/Xr1LNmksFbMSoGCNCgVUcQynhkBiLqSfLrfT9zDXVYl4FGqIntARJSNK1BTvayV/XD0HOtMzbjeARoR91+NYgPScV3PRDu/Bw+X4yM8jjN7GZjz5+a5+co+A8cfuKdrf93CNVV6N02fxzqdC9lhoyp+HD7JxbWKl2+8YmQetrAD4dGf+KaVcJKHr/pgpBND5Tp17jO4vAEbqD+GT7aXDoC+81Onq8UOzNI0/A9s+2IzMlG4Jhfdr52ynpND1plycspByVI3kRqrFPEcWk4x2U5C8OKU6Hf4Zuj3G1d37c8MP6F9F362m6MWa4cfxOeKCmP6uRSVEf9Oxb/w2OnEg40U5nYEpBvziml1STINSgQQlwI4+iO0o1G4Zl7nRsyICZEAhzJRjoPcF5+IfzjGera0xxFkVd+0A9fz8kAyTp53BK5uAxgiaLcIfVj9WsZhSuJPG4kx0abnaNLPJtrDDTdBX9+A0Xk9sXHBCphFVOhobTUspmpCN4zWO9jH8xcPotRyo4D53I7yapSiia89yDxQNLnoNGDVPuX4KTMWO9w2snczEKo9rSJUDJVqdRU1LXbERFCoyCd33Rfm9EZsR06mtkjkpAL7YWOfbjSlyes87OBS3sRW7FqmQPEgSVbovmygOa564Yfo9B48doSKtg09aj0IBJxxXk=
+
+
+script:
+ - python3 build.py
+ - git config --local user.name "$GH_USER"
+ - git config --local user.email "$GH_MAIL"
+ - git commit -am "build"
+ - git push --quiet "https://$GITHUB_TOKEN@github.com/$REPO.git" master > /dev/null 2>&1
+
+branches:
+ only:
+ - master
\ No newline at end of file
diff --git a/Example Plugin/main.py b/Example Plugin/main.py
index 68a8b62..30ea790 100644
--- a/Example Plugin/main.py
+++ b/Example Plugin/main.py
@@ -5,12 +5,15 @@
@hpx.subscribe("init")
def inited():
+ "Called when this plugin is initialised"
pass
@hpx.subscribe("disable")
def disabled():
+ "Called when this plugin has been disiabled"
pass
@hpx.subscribe("remove")
def removed():
+ "Called when this plugin is about to be removed"
pass
\ No newline at end of file
diff --git a/Example Plugin/tests.py b/Example Plugin/test.py
similarity index 100%
rename from Example Plugin/tests.py
rename to Example Plugin/test.py
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..0a04128
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,165 @@
+ GNU LESSER GENERAL PUBLIC LICENSE
+ Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc.
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+
+ This version of the GNU Lesser General Public License incorporates
+the terms and conditions of version 3 of the GNU General Public
+License, supplemented by the additional permissions listed below.
+
+ 0. Additional Definitions.
+
+ As used herein, "this License" refers to version 3 of the GNU Lesser
+General Public License, and the "GNU GPL" refers to version 3 of the GNU
+General Public License.
+
+ "The Library" refers to a covered work governed by this License,
+other than an Application or a Combined Work as defined below.
+
+ An "Application" is any work that makes use of an interface provided
+by the Library, but which is not otherwise based on the Library.
+Defining a subclass of a class defined by the Library is deemed a mode
+of using an interface provided by the Library.
+
+ A "Combined Work" is a work produced by combining or linking an
+Application with the Library. The particular version of the Library
+with which the Combined Work was made is also called the "Linked
+Version".
+
+ The "Minimal Corresponding Source" for a Combined Work means the
+Corresponding Source for the Combined Work, excluding any source code
+for portions of the Combined Work that, considered in isolation, are
+based on the Application, and not on the Linked Version.
+
+ The "Corresponding Application Code" for a Combined Work means the
+object code and/or source code for the Application, including any data
+and utility programs needed for reproducing the Combined Work from the
+Application, but excluding the System Libraries of the Combined Work.
+
+ 1. Exception to Section 3 of the GNU GPL.
+
+ You may convey a covered work under sections 3 and 4 of this License
+without being bound by section 3 of the GNU GPL.
+
+ 2. Conveying Modified Versions.
+
+ If you modify a copy of the Library, and, in your modifications, a
+facility refers to a function or data to be supplied by an Application
+that uses the facility (other than as an argument passed when the
+facility is invoked), then you may convey a copy of the modified
+version:
+
+ a) under this License, provided that you make a good faith effort to
+ ensure that, in the event an Application does not supply the
+ function or data, the facility still operates, and performs
+ whatever part of its purpose remains meaningful, or
+
+ b) under the GNU GPL, with none of the additional permissions of
+ this License applicable to that copy.
+
+ 3. Object Code Incorporating Material from Library Header Files.
+
+ The object code form of an Application may incorporate material from
+a header file that is part of the Library. You may convey such object
+code under terms of your choice, provided that, if the incorporated
+material is not limited to numerical parameters, data structure
+layouts and accessors, or small macros, inline functions and templates
+(ten or fewer lines in length), you do both of the following:
+
+ a) Give prominent notice with each copy of the object code that the
+ Library is used in it and that the Library and its use are
+ covered by this License.
+
+ b) Accompany the object code with a copy of the GNU GPL and this license
+ document.
+
+ 4. Combined Works.
+
+ You may convey a Combined Work under terms of your choice that,
+taken together, effectively do not restrict modification of the
+portions of the Library contained in the Combined Work and reverse
+engineering for debugging such modifications, if you also do each of
+the following:
+
+ a) Give prominent notice with each copy of the Combined Work that
+ the Library is used in it and that the Library and its use are
+ covered by this License.
+
+ b) Accompany the Combined Work with a copy of the GNU GPL and this license
+ document.
+
+ c) For a Combined Work that displays copyright notices during
+ execution, include the copyright notice for the Library among
+ these notices, as well as a reference directing the user to the
+ copies of the GNU GPL and this license document.
+
+ d) Do one of the following:
+
+ 0) Convey the Minimal Corresponding Source under the terms of this
+ License, and the Corresponding Application Code in a form
+ suitable for, and under terms that permit, the user to
+ recombine or relink the Application with a modified version of
+ the Linked Version to produce a modified Combined Work, in the
+ manner specified by section 6 of the GNU GPL for conveying
+ Corresponding Source.
+
+ 1) Use a suitable shared library mechanism for linking with the
+ Library. A suitable mechanism is one that (a) uses at run time
+ a copy of the Library already present on the user's computer
+ system, and (b) will operate properly with a modified version
+ of the Library that is interface-compatible with the Linked
+ Version.
+
+ e) Provide Installation Information, but only if you would otherwise
+ be required to provide such information under section 6 of the
+ GNU GPL, and only to the extent that such information is
+ necessary to install and execute a modified version of the
+ Combined Work produced by recombining or relinking the
+ Application with a modified version of the Linked Version. (If
+ you use option 4d0, the Installation Information must accompany
+ the Minimal Corresponding Source and Corresponding Application
+ Code. If you use option 4d1, you must provide the Installation
+ Information in the manner specified by section 6 of the GNU GPL
+ for conveying Corresponding Source.)
+
+ 5. Combined Libraries.
+
+ You may place library facilities that are a work based on the
+Library side by side in a single library together with other library
+facilities that are not Applications and are not covered by this
+License, and convey such a combined library under terms of your
+choice, if you do both of the following:
+
+ a) Accompany the combined library with a copy of the same work based
+ on the Library, uncombined with any other library facilities,
+ conveyed under the terms of this License.
+
+ b) Give prominent notice with the combined library that part of it
+ is a work based on the Library, and explaining where to find the
+ accompanying uncombined form of the same work.
+
+ 6. Revised Versions of the GNU Lesser General Public License.
+
+ The Free Software Foundation may publish revised and/or new versions
+of the GNU Lesser General Public License from time to time. Such new
+versions will be similar in spirit to the present version, but may
+differ in detail to address new problems or concerns.
+
+ Each version is given a distinguishing version number. If the
+Library as you received it specifies that a certain numbered version
+of the GNU Lesser General Public License "or any later version"
+applies to it, you have the option of following the terms and
+conditions either of that published version or of any later version
+published by the Free Software Foundation. If the Library as you
+received it does not specify a version number of the GNU Lesser
+General Public License, you may choose any version of the GNU Lesser
+General Public License ever published by the Free Software Foundation.
+
+ If the Library as you received it specifies that a proxy can decide
+whether future versions of the GNU Lesser General Public License shall
+apply, that proxy's public statement of acceptance of any version is
+permanent authorization for you to choose that version for the
+Library.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..d3142f9
--- /dev/null
+++ b/README.md
@@ -0,0 +1,33 @@
+
+#### In this repository resides plugins for HappyPanda X. If you wish to write a plugin for HPX head over to [the docs](https://happypandax.github.io/plugin.html#plugins).
+
+### How to download
+
+I recommend these tools to download a single directory from this repo:
+- https://minhaskamal.github.io/DownGit/ -- *Paste the url to the plugin folder in this repo*
+- https://kinolien.github.io/gitzip/ -- *Paste the url to the plugin folder in this repo*
+- [Firefox Addon](https://addons.mozilla.org/en-US/firefox/addon/gitzip/)
+- [Chrome Extension](https://chrome.google.com/webstore/detail/gitzip-for-github/ffabmkklhbepgcgfonabamgnfafbdlkn)
+
+### How to install
+
+Please see [#Installing plugins](https://happypandax.github.io/usage.html#installing-plugins) in the documentation.
+
+# Be careful about plugins
+
+Read the relevant section [#Be careful about plugins](https://happypandax.github.io/usage.html#be-careful-about-plugins) in the documentation
+
+# Plugins
+
+Name | Version | Description
+--- | --- | ---
+[**Chaika Downloader**](https://github.com/happypandax/plugins/tree/master/plugins/Chaika%20Downloader) | `1.0.0` | *A plugin that enables downloading manga and doujinshi from panda.chaika.moe*
+[**Chaika Metadata**](https://github.com/happypandax/plugins/tree/master/plugins/Chaika%20Metadata) | `1.0.0` | *A plugin that can fetch metadata from Panda.Chaika*
+[**EHentai Downloader**](https://github.com/happypandax/plugins/tree/master/plugins/EHentai%20Downloader) | `1.0.0` | *A plugin that enables downloading manga and doujinshi from E-Hentai & ExHentai*
+[**EHentai Login**](https://github.com/happypandax/plugins/tree/master/plugins/EHentai%20Login) | `1.1.0` | *A plugin that can login to E-Hentai & ExHentai*
+[**EHentai Metadata**](https://github.com/happypandax/plugins/tree/master/plugins/EHentai%20Metadata) | `1.2.1` | *A plugin that can fetch metadata from E-Hentai & ExHentai*
+[**File Metadata**](https://github.com/happypandax/plugins/tree/master/plugins/File%20Metadata) | `2.0.2` | *Extracts and applies metadata from a file accompanying a gallery. Supports files produced from eze, e-hentai-downloader and hdoujin*
+[**NHentai Downloader**](https://github.com/happypandax/plugins/tree/master/plugins/NHentai%20Downloader) | `1.0.1` | *A plugin that enables downloading manga and doujinshi from nhentai.net*
+[**NHentai Metadata**](https://github.com/happypandax/plugins/tree/master/plugins/NHentai%20Metadata) | `1.0.1` | *A plugin that can fetch metadata from nhentai.net*
+
+
diff --git a/build.py b/build.py
new file mode 100644
index 0000000..b67c4bd
--- /dev/null
+++ b/build.py
@@ -0,0 +1,65 @@
+import json
+import glob
+import pathlib
+from urllib.parse import quote
+
+readme = """
+#### In this repository resides plugins for HappyPanda X. If you wish to write a plugin for HPX head over to [the docs](https://happypandax.github.io/plugin.html#plugins).
+
+### How to download
+
+I recommend these tools to download a single directory from this repo:
+- https://minhaskamal.github.io/DownGit/ -- *Paste the url to the plugin folder in this repo*
+- https://kinolien.github.io/gitzip/ -- *Paste the url to the plugin folder in this repo*
+- [Firefox Addon](https://addons.mozilla.org/en-US/firefox/addon/gitzip/)
+- [Chrome Extension](https://chrome.google.com/webstore/detail/gitzip-for-github/ffabmkklhbepgcgfonabamgnfafbdlkn)
+
+### How to install
+
+Please see [#Installing plugins](https://happypandax.github.io/usage.html#installing-plugins) in the documentation.
+
+# Be careful about plugins
+
+Read the relevant section [#Be careful about plugins](https://happypandax.github.io/usage.html#be-careful-about-plugins) in the documentation
+
+# Plugins
+
+{}
+
+"""
+
+plugins_dir = "plugins"
+readme_file = "README.md"
+desc_max_length = 200
+repo_user = "happypandax"
+repo_name = "plugins"
+
+def main():
+ print("Building...")
+ plugin_readme = "Name | Version | Description\n--- | --- | ---\n"
+
+ for p in sorted(glob.glob(f"{plugins_dir}/**/hplugin.json")):
+ with open(p, 'r', encoding="utf-8") as f:
+ d = json.load(f)
+ plugin_dir = pathlib.Path(p).parent
+ dir_name = plugin_dir.name
+ plugin_dir = str(plugin_dir).replace('\\', '/')
+ plugin_desc = d.get("description")
+ plugin_ver = d.get("version")
+
+ gh_url = f"https://github.com/{repo_user}/{repo_name}/tree/master/{quote(plugin_dir)}"
+
+ if plugin_desc and plugin_ver:
+ plugin_desc = plugin_desc.split('\n')[0]
+ if len(plugin_desc) > desc_max_length:
+ plugin_desc = plugin_desc[:desc_max_length] + '…'
+ plugin_readme += f"[**{dir_name}**]({gh_url}) | `{plugin_ver}` | *{plugin_desc}*\n"
+
+ txt = readme.format(plugin_readme)
+
+ with open(readme_file, 'w', encoding="utf-8") as f:
+ f.write(txt)
+ print("Done!")
+
+if __name__ == '__main__':
+ main()
\ No newline at end of file
diff --git a/plugins/Chaika Downloader/hplugin.json b/plugins/Chaika Downloader/hplugin.json
new file mode 100644
index 0000000..e78e4f9
--- /dev/null
+++ b/plugins/Chaika Downloader/hplugin.json
@@ -0,0 +1,15 @@
+{
+ "id": "dd86876d-4d4e-438e-bc2e-fbbb18e35742",
+ "shortname": "chaika-downloader",
+ "name": "Chaika Downloader",
+ "version": "1.0.0",
+ "description": "A plugin that enables downloading manga and doujinshi from panda.chaika.moe",
+ "author": "Twiddly",
+ "update_url": "https://github.com/happypandax/plugins/tree/master/plugins/Chaika%20Downloader",
+ "website": "https://github.com/happypandax/plugins/tree/master/plugins/Chaika%20Downloader",
+ "entry": "main.py",
+ "test": "test.py",
+ "require": [
+ "happypandax >= 0.12.0"
+ ]
+}
\ No newline at end of file
diff --git a/plugins/Chaika Downloader/main.py b/plugins/Chaika Downloader/main.py
new file mode 100644
index 0000000..59ccb01
--- /dev/null
+++ b/plugins/Chaika Downloader/main.py
@@ -0,0 +1,149 @@
+# main.py
+import __hpx__ as hpx
+import regex
+
+from bs4 import BeautifulSoup
+
+DownloadRequest = hpx.command.DownloadRequest
+
+log = hpx.get_logger("main")
+
+IDENTIFIER = "chaika"
+HEADERS = {'user-agent':"Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0"}
+DEFAULT_DELAY = 0.5
+
+URLS = {
+ 'ch': 'https://panda.chaika.moe',
+ 'gallery_api': 'https://panda.chaika.moe/jsearch?gallery=',
+}
+
+def website_url_regex_gen(domain, path_regex=None, variable_port=False, variable_tld=False, trailing_slash=True, end=True, trailing_fragment=True):
+ """
+ Generates a regex suitable for a specific domain
+ """
+ rgx = r"^(http\:\/\/|https\:\/\/)?(www\.)?({})".format(domain)
+ if variable_tld:
+ rgx += r"\.[a-z]{2,5}"
+ if variable_port:
+ rgx += r"(:[0-9]{1,5})?"
+ if trailing_slash:
+ rgx += r"\/?"
+ if path_regex:
+ rgx += path_regex
+ if trailing_slash:
+ rgx += r"\/?"
+ if trailing_fragment:
+ rgx += r"(#\S+)?"
+ if end:
+ rgx += "$"
+ return rgx
+
+@hpx.subscribe("init")
+def inited():
+ # set default delay if not set
+ delays = hpx.get_setting("network", "delays", {})
+ delay_url = URLS['ch']
+ if delay_url not in delays:
+ log.info(f"Setting delay on {delay_url} requests to {DEFAULT_DELAY}")
+ delays[delay_url] = DEFAULT_DELAY
+ hpx.update_setting("network", "delays", delays)
+
+@hpx.attach("Download.info")
+def download_info():
+ return hpx.command.DownloadInfo(
+ identifier = IDENTIFIER,
+ name = "Chaika",
+ parser = website_url_regex_gen("panda.chaika.moe", path_regex=r"(gallery|archive)\/[0-9]{3,15}", trailing_slash=True, variable_tld=False, trailing_fragment=True, end=True),
+ sites = ("https://panda.chaika.moe",),
+ description = "Download manga and doujinshi from panda.chaika.moe",
+ )
+
+@hpx.attach("Download.query", trigger=IDENTIFIER)
+def download_query(item):
+ """
+ Called to query for resource URLs that should be downloaded.
+ Note that HPX will handle the actual downloading part.
+ The attached handler should just return all the URLs that should be downloaded in the form of .:class:`DownloadRequest` objects
+
+ should return:
+ a tuple of :class:`DownloadRequest` for all the URL resources that should be downloaded.
+ Note that the download system is recursive, so if the URL resource matches a download handler (the same or a different one),
+ That handler will be called upon with a new :class:`DownloadItem` for that particular URL
+ (though only once, meaning, no handler will be called upon again with the exact same URL during a single session)
+ """
+
+ log.info(f"querying url: {item.url}")
+
+ # prepare request
+ req_props = hpx.command.RequestProperties(
+ headers=HEADERS,
+ )
+
+ # chaika has a simple url system where every download url is in the form of https://panda.chaika.moe/archive/32870/download/
+ # if the url is a gallery url, find and retrieve the archive urls
+
+ url_type, gid = parse_url(item.url)
+
+ download_urls = []
+
+ if url_type == 'gallery':
+ log.info(f"url was a gallery url, retrieving archive urls")
+ req = hpx.command.SingleGETRequest().request(URLS['gallery_api']+str(gid), req_props)
+ if req.ok:
+ log.info("request was successful")
+
+ # get all archive urls
+ a_urls = req.json.get("archives")
+ if a_urls:
+ # we also get to set the name of this download item
+ title = req.json.get('title')
+ if title:
+ item.name = title
+
+ for a in a_urls:
+ download_urls.append(URLS['ch']+a['download'])
+ else:
+ download_urls.append(URLS['ch']+f"/archive/{gid}/download/")
+
+
+ download_requests = []
+
+ if download_urls:
+ log.debug(f"found {len(download_urls)} download urls: {download_urls}")
+ for durl in download_urls:
+ download_requests.append(DownloadRequest(downloaditem=item, url=durl))
+
+ if download_requests:
+ log.info(f"was able to prepare requests for {len(download_requests)} urls")
+ return tuple(download_requests)
+
+@hpx.attach("Download.done", trigger=IDENTIFIER)
+def download_done(result):
+ """
+ Called when downloading of all :class:`DownloadRequest` for a specific :class:`DownloadItem` has finished.
+ The handler should do any post-processing here (archive files, rename files or folders, delete extranous files and etc.).
+ Remember to set the `status` property on the :class:`DownloadResult` object to `False` if the post-processing was a failure.
+ Note that the handler should *not* import the file into HPX (if it's an item), that part will be taken care of by HPX
+
+ should return:
+ the same :class:`DownloadResult` that was provided to the handler, potentially modified on the 'path' or `status` and `reason` properties
+ """
+ # there's nothing special to post-process in the case of chaika downloader, so just return the result as is
+ log.info(f"download of archive was successful for {result.downloaditem.name}")
+ return result
+
+def parse_url(url):
+ "Parses url into a tuple of gallery/archive and id"
+ gallery_id = None
+ stype = "gallery"
+
+ gallery_id = regex.search('([0-9]+)', url)
+ if gallery_id:
+ gallery_id = gallery_id.group()
+ else:
+ log.warning("Error extracting id from url: {}".format(url))
+
+ if 'archive' in url:
+ stype = 'archive'
+
+ return stype, int(gallery_id)
diff --git a/plugins/Chaika Downloader/readme.md b/plugins/Chaika Downloader/readme.md
new file mode 100644
index 0000000..593469b
--- /dev/null
+++ b/plugins/Chaika Downloader/readme.md
@@ -0,0 +1,13 @@
+Chaika Downloader
+----------------------------
+
+> A plugin that enables downloading manga and doujinshi from panda.chaika.moe
+
+## Configuration
+
+There's no available config options for this plugin
+
+# Changelog
+
+- `1.0.0`
+ - first version
\ No newline at end of file
diff --git a/plugins/Chaika Downloader/test.py b/plugins/Chaika Downloader/test.py
new file mode 100644
index 0000000..36a317e
--- /dev/null
+++ b/plugins/Chaika Downloader/test.py
@@ -0,0 +1 @@
+# test.py
\ No newline at end of file
diff --git a/plugins/Chaika Metadata/hplugin.json b/plugins/Chaika Metadata/hplugin.json
new file mode 100644
index 0000000..1b6ab70
--- /dev/null
+++ b/plugins/Chaika Metadata/hplugin.json
@@ -0,0 +1,15 @@
+{
+ "id": "55747a41-789b-43dd-964a-2a6cb1761ff4",
+ "shortname": "chaika-metadata",
+ "name": "Chaika Metadata",
+ "version": "1.0.0",
+ "description": "A plugin that can fetch metadata from Panda.Chaika",
+ "author": "Twiddly",
+ "update_url": "https://github.com/happypandax/plugins/tree/master/plugins/Chaika%20Metadata",
+ "website": "https://github.com/happypandax/plugins/tree/master/plugins/Chaika%20Metadata",
+ "entry": "main.py",
+ "test": "test.py",
+ "require": [
+ "happypandax >= 0.10.0"
+ ]
+}
\ No newline at end of file
diff --git a/plugins/Chaika Metadata/main.py b/plugins/Chaika Metadata/main.py
new file mode 100644
index 0000000..cbb4de3
--- /dev/null
+++ b/plugins/Chaika Metadata/main.py
@@ -0,0 +1,473 @@
+# main.py
+import __hpx__ as hpx
+import regex
+import arrow
+import datetime
+import os
+import urllib
+import html
+
+from bs4 import BeautifulSoup
+from PIL import Image, ImageChops
+
+log = hpx.get_logger("main")
+
+MATCH_URL_PREFIX = r"^(http\:\/\/|https\:\/\/)?(www\.)?" # http:// or https:// + www.
+MATCH_URL_END = r"\/?$"
+
+DEFAULT_DELAY = 1.5
+
+URLS_REGEX = {
+ 'gallery': MATCH_URL_PREFIX + r"(panda\.chaika\.moe\/(archive|gallery)\/[0-9]+)" + MATCH_URL_END,
+}
+
+URLS = {
+ 'ch': 'https://panda.chaika.moe',
+ 'gallery': 'https://panda.chaika.moe/gallery/',
+ 'archive': 'https://panda.chaika.moe/archive/',
+ 'gallery_api': 'https://panda.chaika.moe/jsearch?gallery=',
+ 'archive_api': 'https://panda.chaika.moe/jsearch?archive=',
+ 'hash_api': 'https://panda.chaika.moe/jsearch?sha1=',
+ 'title_search': "https://panda.chaika.moe/galleries/?title={title}&tags=&category=&provider=&uploader=&rating_from=&rating_to=&filesize_from=&filesize_to=&filecount_from=&filecount_to=&sort=posted&asc_desc=desc&apply="
+}
+
+HEADERS = {'user-agent':"Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0"}
+
+PLUGIN_CONFIG = {
+ 'filename_search': False, # use the filename/folder-name for searching instead of gallery title
+ 'remove_namespaces': True, # remove superfluous namespaces like 'artist', 'language' and 'group' because they are handled specially in HPX
+ 'gallery_results_limit': 10, # maximum amount of galleries to return
+ 'blacklist_tags': [], # tags to ignore when updating tags
+ 'add_gallery_url': True, # add ehentai url to gallery
+ 'preferred_language': "english", # preferred gallery langauge (in gallery title) to extract from if multiple galleries were found, set empty string for default
+}
+
+@hpx.subscribe("init")
+def inited():
+ PLUGIN_CONFIG.update(hpx.get_plugin_config())
+
+ # set default delay values if not set
+ delays = hpx.get_setting("network", "delays", {})
+ for u in (URLS['ch'],):
+ if u not in delays:
+ log.info(f"Setting delay on {u} requests to {DEFAULT_DELAY}")
+ delays[u] = DEFAULT_DELAY
+ hpx.update_setting("network", "delays", delays)
+
+@hpx.subscribe('config_update')
+def config_update(cfg):
+ PLUGIN_CONFIG.update(cfg)
+
+@hpx.subscribe("disable")
+def disabled():
+ pass
+
+@hpx.subscribe("remove")
+def removed():
+ pass
+
+@hpx.attach("Metadata.info")
+def metadata_info():
+ return hpx.command.MetadataInfo(
+ identifier = "chaika",
+ name = "Panda.Chaika",
+ parser = URLS_REGEX['gallery'],
+ sites = ("https://panda.chaika.moe",),
+ description = "Fetch metadata from Panda.Chaika",
+ models = (
+ hpx.command.GetDatabaseModel("Gallery"),
+ )
+ )
+
+@hpx.attach("Metadata.query", trigger="chaika")
+def query(itemtuple):
+ """
+ Called to query for candidates to extract metadata from.
+ Note that HPX will handle choosing which candidates to extract data from.
+ The attached handler should just return all the candidates found.
+ """
+ log.info("Querying chaika for metadata")
+ mdata = []
+ for mitem in itemtuple:
+ item = mitem.item
+ url = mitem.url
+ gurls = [] # tuple of (title, url)
+ # url was provided
+ if url:
+ log.info(f"url provided: {url} for {item}")
+ gurls.append((url, url))
+ else: # manually search for id
+ log.info(f"url not provided for {item}")
+ # search with title
+ i_title = ""
+ i_hash = ""
+ if PLUGIN_CONFIG.get("filename_search"):
+ sources = item.get_sources()
+ if sources:
+ # get folder/file name
+ i_title = os.path.split(sources[0])[1]
+ # remove ext
+ i_title = os.path.splitext(i_title)[0]
+ else:
+ if item.titles:
+ i_title = item.titles[0].name # make user choice
+ if i_title:
+ gurls = title_search(i_title)
+
+ # search with hash
+ if not gurls:
+ pass
+
+ log.info(f"found {len(gurls)} urls for item: {item}")
+
+ # list is sorted by date added so we reverse it
+ gurls.reverse()
+
+ log.debug(f"{gurls}")
+ final_gurls = []
+ pref_lang = PLUGIN_CONFIG.get('preferred_language')
+ if pref_lang:
+ for t in gurls:
+ if pref_lang.lower() in t[0].lower():
+ final_gurls.insert(0, t)
+ continue
+ final_gurls.append(t)
+ else:
+ final_gurls = gurls
+
+ for t, u in final_gurls:
+ g_type, g_id = parse_url(u)
+ if g_type and g_id:
+ mdata.append(hpx.command.MetadataData(
+ metadataitem = mitem,
+ title=t,
+ url=u,
+ data={
+ 'type': g_type,
+ 'id': g_id,
+ 'gallery_url': u,
+ }))
+ return tuple(mdata)
+
+@hpx.attach("Metadata.apply", trigger="chaika")
+def apply(datatuple):
+ """
+ Called to fetch and apply metadata to the given data items.
+ Remember to set the `status` property on the :class:`MetadataResult` object to `True` on a successful fetch.
+ """
+ log.info("Applying metadata from chaika")
+ mresult = []
+
+ for mdata in datatuple:
+ applied = False
+ # prepare request
+ req_props = hpx.command.RequestProperties(
+ headers=HEADERS,
+ )
+
+ api_url = URLS['archive_api'] if mdata.data['type'] == 'archive' else URLS['gallery_api']
+ api_url += str(mdata.data['id'])
+
+ r = hpx.command.SingleGETRequest().request(api_url, req_props)
+ if r.ok:
+ response = r.json
+ if response and not 'result' in response:
+ filtered_data = format_metadata(response, mdata.item, apply_url=PLUGIN_CONFIG.get('add_gallery_url', True), gallery_url=mdata.data['gallery_url'])
+ applied = apply_metadata(filtered_data, mdata.item, mdata.options)
+ elif response:
+ log.warning(response)
+ reason = ""
+ if not applied and 'result' in response:
+ reason = response['result']
+ mresult.append(hpx.command.MetadataResult(data=mdata, status=applied, reason=reason))
+ log.info(f"Applied: {applied}")
+ return tuple(mresult)
+
+def title_search(title, session=None, _times=0):
+ "Searches on chaika for galleries with given title, returns a list of (title, matching gallery urls)"
+ search_url = URLS['title_search']
+ log.debug(f"searching with title: {title}")
+ f_url = search_url.format(
+ title=urllib.parse.quote_plus(title)
+ )
+ log.debug(f"final url: {f_url}")
+ r = page_results(f_url, session=session)
+ if not r and not _times:
+ title = regex.sub(r"\(.+?\)|\[.+?\]", "", title)
+ title = " ".join(title.split())
+ r = title_search(title, session, _times=_times+1)
+ return r
+
+def page_results(page_url, limit=None, session=None):
+ "Opens chaika page, parses for results, and then returns list of (title, url)"
+ found_urls = []
+ if limit is None:
+ limit = PLUGIN_CONFIG.get("gallery_results_limit")
+
+ # prepare request
+ req_props = hpx.command.RequestProperties(
+ headers=HEADERS,
+ )
+ if session:
+ req_props.session = session
+ log.debug(f"COOKIES: {session.cookies}")
+ r = hpx.command.SingleGETRequest().request(page_url, req_props)
+ soup = BeautifulSoup(r.text, "html.parser")
+ results = soup.findAll("tr", class_="result-list", limit=limit)
+ results = [r.findAll('td')[1] for r in results]
+ # str(x.a.string)
+ found_urls = [(str(x.a.string), URLS['ch'] + x.a['href']) for x in results] # title, url
+
+ if not found_urls:
+ log.warning(f"No results found on url: {page_url}")
+ log.debug(f"HTML: {r.text}")
+ return found_urls
+
+def parse_url(url):
+ "Parses url into a tuple of gallery/archive and id"
+ gallery_id = None
+ stype = "gallery"
+
+ gallery_id = regex.search('([0-9]+)', url)
+ if gallery_id:
+ gallery_id = gallery_id.group()
+ else:
+ log.warning("Error extracting id from url: {}".format(url))
+
+ if 'archive' in url:
+ stype = 'archive'
+
+ return stype, int(gallery_id)
+
+def capitalize_text(text):
+ """
+ better str.capitalize
+ """
+ return " ".join(x.capitalize() for x in text.strip().split())
+
+def format_metadata(gdata, item, apply_url=False, gallery_url=None):
+ """
+ Formats metadata to look like this for apply_metadata:
+ data = {
+ 'titles': None, # [(title, language),...]
+ 'artists': None, # [(artist, (circle, circle, ..)),...]
+ 'parodies': None, # [parody, ...]
+ 'category': None,
+ 'tags': None, # [tag, tag, tag, ..] or {ns:[tag, tag, tag, ...]}
+ 'pub_date': None, # DateTime object or Arrow object
+ 'language': None,
+ 'urls': None # [url, ...]
+ }
+ """
+ mdata = {}
+
+ mdata['titles'] = []
+
+ parsed_text = hpx.command.ItemTextParser(gdata['title'])
+
+ parsed_title = parsed_text.extract_title()
+ if parsed_title:
+ parsed_title = parsed_title[0]
+ mdata['titles'].append((parsed_title or gdata['title'], 'english'))
+
+ mdata['titles'].append((gdata['title_jpn'], 'japanese'))
+
+
+ mdata['category'] = gdata['category']
+ if gdata['posted']:
+ mdata['pub_date'] = arrow.Arrow.fromtimestamp(gdata['posted'])
+
+ lang = "japanese" # default language
+
+ artists = set()
+ circles = set()
+ parodies = set()
+
+ parsed_artists = parsed_text.extract_artist()
+ parsed_circles = parsed_text.extract_circle()
+
+ extranous_namespaces = ("artist", "parody", "group", "language")
+ mdata['tags'] = {}
+
+ for nstag in gdata['tags']:
+ onstag = nstag
+ nstag = nstag.replace('_', ' ')
+ blacklist_tags = PLUGIN_CONFIG.get("blacklist_tags")
+ if blacklist_tags and (nstag in blacklist_tags or onstag in blacklist_tags):
+ continue
+
+ ns = None
+ if ':' in nstag:
+ ns, t = nstag.split(':', 1)
+ else:
+ t = nstag
+
+ if ns == 'language' and t != 'translated':
+ lang = t
+ elif ns == "artist":
+ for a in artists: # the artist extracted from the title likely has better capitalization, so choose that instead
+ if a.lower() == t.lower():
+ artists.add(a)
+ break
+ else:
+ artists.add(t)
+ elif ns == "group":
+ for c in circles: # the circle extracted from the title likely has better capitalization, so choose that instead
+ if c.lower() == t.lower():
+ circles.add(c)
+ break
+ else:
+ circles.add(t)
+ elif ns == "parody":
+ parodies.add(t)
+
+ if not (PLUGIN_CONFIG.get("remove_namespaces") and ns in extranous_namespaces):
+ mdata['tags'].setdefault(ns, []).append(t)
+ else:
+ log.debug(f"removing namespace {ns}")
+
+ log.debug(f"tags: {mdata['tags']}")
+
+ mdata['language'] = lang
+
+ if parodies:
+ mdata['parodies'] = parodies
+
+ if artists:
+ a_circles = []
+ for a in artists:
+ a_circles.append((a, tuple(circles))) # assign circles to each artist
+ mdata['artists'] = a_circles
+
+ if apply_url:
+ if gdata.get('gallery', False):
+ mdata['urls'] = [URLS['gallery']+f"{gdata['gallery']}/"]
+ elif gallery_url:
+ mdata['urls'] = [gallery_url]
+
+ return mdata
+
+GalleryData = hpx.command.GalleryData
+LanguageData = hpx.command.LanguageData
+TitleData = hpx.command.TitleData
+ArtistData = hpx.command.ArtistData
+ArtistNameData = hpx.command.ArtistNameData
+ParodyData = hpx.command.ParodyData
+ParodyNameData = hpx.command.ParodyNameData
+CircleData = hpx.command.CircleData
+CategoryData = hpx.command.CategoryData
+UrlData = hpx.command.UrlData
+NamespaceTagData= hpx.command.NamespaceTagData
+TagData= hpx.command.TagData
+NamespaceData = hpx.command.NamespaceData
+
+def apply_metadata(data, gallery, options):
+ """
+ data = {
+ 'titles': None, # [(title, language),...]
+ 'artists': None, # [(artist, (circle, circle, ..)),...]
+ 'parodies': None, # [parody, ...]
+ 'category': None,
+ 'tags': None, # [tag, tag, tag, ..] or {ns:[tag, tag, tag, ...]}
+ 'pub_date': None, # DateTime object or Arrow object
+ 'language': None,
+ 'urls': None # [url, ...]
+ }
+ """
+
+ log.debug(f"data: {data}")
+
+ gdata = GalleryData()
+
+ if isinstance(data.get('titles'), (list, tuple, set)):
+ gtitles = []
+ for t, l in data['titles']:
+ gtitle = None
+ if t:
+ t = html.unescape(t)
+ gtitle = TitleData(name=t)
+ if t and l:
+ gtitle.language = LanguageData(name=l)
+ if gtitle:
+ gtitles.append(gtitle)
+
+ if gtitles:
+ gdata.titles = gtitles
+ log.debug("applied titles")
+
+ if isinstance(data.get('artists'), (list, tuple, set)):
+ gartists = []
+ for a, c in data['artists']:
+ if a:
+ gartist = ArtistData(names=[ArtistNameData(name=capitalize_text(a))])
+ gartists.append(gartist)
+
+ if c:
+ gcircles = []
+ for circlename in [x for x in c if x]:
+ gcircles.append(CircleData(name=capitalize_text(circlename)))
+ gartist.circles = gcircles
+
+ if gartists:
+ gdata.artists = gartists
+ log.debug("applied artists")
+
+ if isinstance(data.get('parodies'), (list, tuple, set)):
+ gparodies = []
+ for p in data['parodies']:
+ if p:
+ gparody = ParodyData(names=[ParodyNameData(name=capitalize_text(p))])
+ gparodies.append(gparody)
+
+ if gparodies:
+ gdata.parodies = gparodies
+ log.debug("applied parodies")
+
+ if data.get('category'):
+ gdata.category = CategoryData(name=data['category'])
+ log.debug("applied category")
+
+ if data.get('language'):
+ gdata.language = LanguageData(name=data['language'])
+ log.debug("applied language")
+
+ if isinstance(data.get('tags'), (dict, list)):
+ if isinstance(data['tags'], list):
+ data['tags'] = {None: data['tags']}
+ gnstags = []
+ for ns, tags in data['tags'].items():
+ if ns is not None:
+ ns = ns.strip()
+ if ns and ns.lower() == 'misc':
+ ns = None
+ for t in tags:
+ t = t.strip()
+ if t:
+ kw = {'tag': TagData(name=t)}
+ if ns:
+ kw['namespace'] = NamespaceData(name=ns)
+ gnstags.append(NamespaceTagData(**kw))
+
+ if gnstags:
+ gdata.tags = gnstags
+ log.debug("applied tags")
+
+ if isinstance(data.get('pub_date'), (datetime.datetime, arrow.Arrow)):
+ pub_date = data['pub_date']
+ gdata.pub_date = pub_date
+ log.debug("applied pub_date")
+
+ if isinstance(data.get('urls'), (list, tuple)):
+ gurls = []
+ for u in data['urls']:
+ if u:
+ gurls.append(UrlData(name=u))
+ if gurls:
+ gdata.urls = gurls
+ log.debug("applied urls")
+
+ applied = hpx.command.UpdateItemData(gallery, gdata, options=options)
+
+ log.debug(f"applied: {applied}")
+
+ return applied
\ No newline at end of file
diff --git a/plugins/Chaika Metadata/readme.md b/plugins/Chaika Metadata/readme.md
new file mode 100644
index 0000000..28461ee
--- /dev/null
+++ b/plugins/Chaika Metadata/readme.md
@@ -0,0 +1,41 @@
+Chaika Metadata
+----------------------------
+
+> This plugin fetches metadata from Panda.Chaika
+
+## Configuration
+
+Configure this plugin by adding `chaika-metadata` to the `plugin.config` namespace in your `config.yaml`:
+```yaml
+plugin:
+ config:
+ chaika-metadata:
+ option1: True
+ option2:
+ - item 1
+ - item 2
+```
+
+#### Available options
+
+Name | Default | Description
+--- | --- | ---
+`filename_search` | `false` | use the filename/folder-name for searching instead of gallery title
+`remove_namespaces` | `true` | remove superfluous namespaces like 'artist', 'language' and 'group' because they are handled specially in HPX
+`gallery_results_limit` | `10` | maximum amount of galleries to return
+`blacklist_tags` | `[]` | tags to ignore when updating tags, a list of `namespace:tag` or `tag` strings
+`add_gallery_url` | `true` | add chaika url to gallery
+`preferred_language` | `english` | preferred gallery langauge (in gallery title) to extract from if multiple galleries were found, set empty string for default
+
+
+## Things yet to be implemented
+
+- File similarity search
+
+# Changelog
+
+- `1.0.0`
+ - Updated to reflect new changes in HPX v0.10.0
+
+- `0.1.0b`
+ - first version
\ No newline at end of file
diff --git a/plugins/Chaika Metadata/test.py b/plugins/Chaika Metadata/test.py
new file mode 100644
index 0000000..36a317e
--- /dev/null
+++ b/plugins/Chaika Metadata/test.py
@@ -0,0 +1 @@
+# test.py
\ No newline at end of file
diff --git a/plugins/EHentai Downloader/hplugin.json b/plugins/EHentai Downloader/hplugin.json
new file mode 100644
index 0000000..21eb484
--- /dev/null
+++ b/plugins/EHentai Downloader/hplugin.json
@@ -0,0 +1,15 @@
+{
+ "id": "efaec768-760c-49d7-8d45-fb70b7db45e5",
+ "shortname": "ehentai-downloader",
+ "name": "EHentai Downloader",
+ "version": "1.0.0",
+ "description": "A plugin that enables downloading manga and doujinshi from E-Hentai & ExHentai",
+ "author": "Twiddly",
+ "update_url": "https://github.com/happypandax/plugins/tree/master/plugins/EHentai%20Downloader",
+ "website": "https://github.com/happypandax/plugins/tree/master/plugins/EHentai%20Downloader",
+ "entry": "main.py",
+ "test": "test.py",
+ "require": [
+ "happypandax >= 0.12.0"
+ ]
+}
\ No newline at end of file
diff --git a/plugins/EHentai Downloader/main.py b/plugins/EHentai Downloader/main.py
new file mode 100644
index 0000000..bfaaa06
--- /dev/null
+++ b/plugins/EHentai Downloader/main.py
@@ -0,0 +1,251 @@
+# main.py
+import __hpx__ as hpx
+import regex
+import json
+from bs4 import BeautifulSoup
+
+DownloadRequest = hpx.command.DownloadRequest
+
+log = hpx.get_logger("main")
+
+EH_IDENTIFIER = "ehentai"
+EX_IDENTIFIER = "exhentai"
+
+HEADERS = {'user-agent':"Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0"}
+DEFAULT_DELAY = 5
+
+URLS = {
+ 'eh': 'https://e-hentai.org',
+ 'ex': 'https://exhentai.org',
+ 'e_api': 'https://api.e-hentai.org/api.php',
+ 'ex_api': 'https://exhentai.org/api.php',
+ 'e_archiver': 'https://e-hentai.org/archiver.php?gid={gallery_id}&token={gallery_token}&or={archiver_key}',
+ 'ex_archiver': 'https://exhentai.org/archiver.php?gid={gallery_id}&token={gallery_token}&or={archiver_key}',
+}
+
+
+def website_url_regex_gen(domain, path_regex=None, variable_port=False, variable_tld=False, trailing_slash=True, end=True, trailing_fragment=True):
+ """
+ Generates a regex suitable for a specific domain
+ """
+ rgx = r"^(http\:\/\/|https\:\/\/)?(www\.)?({})".format(domain)
+ if variable_tld:
+ rgx += r"\.[a-z]{2,5}"
+ if variable_port:
+ rgx += r"(:[0-9]{1,5})?"
+ if trailing_slash:
+ rgx += r"\/?"
+ if path_regex:
+ rgx += path_regex
+ if trailing_slash:
+ rgx += r"\/?"
+ if trailing_fragment:
+ rgx += r"(#\S+)?"
+ if end:
+ rgx += "$"
+ return rgx
+
+@hpx.subscribe("init")
+def inited():
+ # set default delay values if not set
+ delays = hpx.get_setting("network", "delays", {})
+ for u in (URLS['ex'], URLS['eh'], "https://api.e-hentai.org", URLS['ex_api']):
+ if u not in delays:
+ log.info(f"Setting delay on {u} requests to {DEFAULT_DELAY}")
+ delays[u] = DEFAULT_DELAY
+ hpx.update_setting("network", "delays", delays)
+
+@hpx.attach("Download.info")
+def eh_download_info():
+ return hpx.command.DownloadInfo(
+ identifier = EH_IDENTIFIER,
+ name = "E-Hentai",
+ parser = website_url_regex_gen("e-hentai.org", path_regex=r"g\/[0-9]{3,10}\/[0-9a-zA-Z]{3,15}", trailing_slash=True, variable_tld=False, trailing_fragment=True, end=True),
+ sites = ("https://e-hentai.org",),
+ description = "Download manga and doujinshi from e-hentai.org",
+ )
+
+@hpx.attach("Download.info")
+def ex_download_info():
+ return hpx.command.DownloadInfo(
+ identifier = EX_IDENTIFIER,
+ name = "ExHentai",
+ parser = website_url_regex_gen("exhentai.org", path_regex=r"g\/[0-9]{3,10}\/[0-9a-zA-Z]{3,15}", trailing_slash=True, variable_tld=False, trailing_fragment=True, end=True),
+ sites = ("https://exhentai.org",),
+ description = "Download manga and doujinshi from exhentai.org",
+ )
+
+@hpx.attach("Download.query", trigger=EH_IDENTIFIER)
+def eh_download_query(item):
+ return download_query(item, False)
+
+@hpx.attach("Download.query", trigger=EX_IDENTIFIER)
+def ex_download_query(item):
+ return download_query(item, True)
+
+def download_query(item, is_exhentai):
+ """
+ Called to query for resource URLs that should be downloaded.
+ Note that HPX will handle the actual downloading part.
+ The attached handler should just return all the URLs that should be downloaded in the form of .:class:`DownloadRequest` objects
+
+ should return:
+ a tuple of :class:`DownloadRequest` for all the URL resources that should be downloaded.
+ Note that the download system is recursive, so if the URL resource matches a download handler (the same or a different one),
+ That handler will be called upon with a new :class:`DownloadItem` for that particular URL
+ (though only once, meaning, no handler will be called upon again with the exact same URL during a single session)
+ """
+
+ gid, gtoken = parse_url(item.url)
+ download_requests = []
+
+ #===============================================================================
+ # get ehentai login
+ login_site = URLS['ex'] if is_exhentai else URLS['eh']
+ login_status = hpx.command.GetLoginStatus(login_site)
+ login_session = None
+ if login_status:
+ login_session = hpx.command.GetLoginSession(login_site)
+ if not login_session:
+ log.warning("unable to get a login sesion for querying gallery data")
+ return ()
+ log.info("logged in, attempting to download archive")
+
+ #===============================================================================
+ # get the gallery metadata which should have the archive key
+ # https://ehwiki.org/wiki/API#Gallery_Metadata
+ log.info("getting archiver key")
+ eh_data = {
+ 'method': 'gdata',
+ 'gidlist': [[gid, gtoken]],
+ }
+ req_props = hpx.command.RequestProperties(
+ headers=HEADERS,
+ json=eh_data,
+ session=login_session
+ )
+ api_url = URLS['ex_api' if is_exhentai else 'e_api']
+ log.info(f"requesting with api url {api_url}")
+ r = hpx.command.SinglePOSTRequest().request(api_url, req_props)
+ if not r.ok:
+ log.warning(f"got invalid metadata page or bad status: {r.status_code}")
+ log.debug(r.text)
+ return ()
+ try:
+ response = r.json
+ except json.JSONDecodeError:
+ response = None
+ log.info("got empty response when trying to retrieve archiver key, this usually means that user has no access to exhentai")
+ return ()
+ if not response or 'error' in response:
+ log.warning("response has an error of some sort, and so we have no archive key to use")
+ log.debug(r.text)
+ return ()
+
+ #===============================================================================
+ # Read the metadata of the gallery to fill out the download queue item
+ #
+ # While in theory we should only ever have a single entry in the response, best keep it more general just in case this gets retrofitted to handle multiple urls at once
+ # The information we reliably get is the thumbnail url and the title of the gallery
+ # We seem to also always get an archive key, but the key is not always valid, and so the archive url request can fail
+ #
+ # Yes, there is an expunged flag in the metadata, but it is always false when the gallery/archive is not available
+ # It is also true sometimes and yet the gallery/archive is totally accessable and so is meaningless
+ for gdata in response['gmetadata']:
+ archive_req = False
+ try:
+ if 'title' in gdata:
+ item.name = gdata['title']
+ if 'thumb' in gdata:
+ download_requests.append(
+ DownloadRequest(
+ downloaditem=item,
+ url=gdata['thumb'],
+ is_thumbnail=True,
+ properties=hpx.command.RequestProperties(method=hpx.Method.GET, headers=HEADERS, session=login_session), # we need to use the same session
+ ))
+ if 'archiver_key' in gdata:
+ log.info(f"found archiver key for gallery {(gid, gtoken)}")
+ a_key = gdata['archiver_key']
+ a_url = URLS['ex_archiver' if is_exhentai else 'e_archiver'].format(gallery_id=gid, gallery_token=gtoken, archiver_key=a_key)
+ form_data = {
+ "dltype": "org", #original quality, instead of a resampled version
+ "dlcheck": "Download Original Archive"
+ }
+ req_props = hpx.command.RequestProperties(
+ headers=HEADERS,
+ data=form_data,
+ session=login_session
+ )
+ r = hpx.command.SinglePOSTRequest().request(a_url, req_props)
+ if r.ok:
+ if "Insufficient funds" in r.text:
+ log.info("Unable to grab gallery archive due to insufficent funds (GP) on the account")
+ item.name = "(Insufficient GP) "+item.name
+ elif "Key missing, or incorrect key provided" not in r.text:
+ soup = BeautifulSoup(r.text, "html.parser")
+ dp_url = soup.find("p", id="continue")
+ if dp_url and dp_url.a: # finally
+ download_requests.append(
+ DownloadRequest(
+ downloaditem=item,
+ url=dp_url.a['href'] + '?start=1',
+ properties=hpx.command.RequestProperties(method=hpx.Method.GET, headers=HEADERS, session=login_session), # we need to use the same session
+ filename=item.name.strip()+'.zip'))
+ archive_req = True
+ log.debug(f"adding the archive url {download_requests[-1].url}")
+ if not archive_req:
+ log.info("Something went wrong and we did not actually find a URL")
+ #TODO Actually better handle the various cases of why we do not have a url
+ else:
+ log.warning(f"got invalid key page or bad status: {r.status_code}")
+ if r.status_code == 404 and "This gallery is currently unavailable" in r.text:
+ #We know that there is a valid key for us to get here, so the gallery existed at some point in the past
+ #This seems like it is most of the time a copyright takedown, but I have no idea why this is not marked as expunged
+ item.name = "(Gallery Unavailable) "+item.name
+ else:
+ log.warning(f"didn't find archiver key for data: {eh_data}")
+ item.name = "(Archive Unavailable) "+item.name
+ except Exception as e:
+ log.debug(f"got an error, last request content: \n\t {r.text}")
+ raise
+
+ if not archive_req:
+ pass
+ # TODO: download individual images instead
+
+ if download_requests:
+ log.info(f"was able to prepare {len(download_requests)} requests")
+ else:
+ log.info("unable to prepare any URLs to download")
+ return tuple(download_requests)
+
+@hpx.attach("Download.done", trigger=[EX_IDENTIFIER, EH_IDENTIFIER])
+def download_done(result):
+ """
+ Called when downloading of all :class:`DownloadRequest` for a specific :class:`DownloadItem` has finished.
+ The handler should do any post-processing here (archive files, rename files or folders, delete extranous files and etc.).
+ Remember to set the `status` property on the :class:`DownloadResult` object to `False` if the post-processing was a failure.
+ Note that the handler should *not* import the file into HPX (if it's an item), that part will be taken care of by HPX
+
+ should return:
+ the same :class:`DownloadResult` that was provided to the handler, potentially modified on the 'path' or `status` and `reason` properties
+ """
+ # there's nothing special to post-process in the case of e(x)hentai downloader, so just return the result as is
+ log.info(f"download of archive was successful for {result.downloaditem.name}")
+ #TODO Mark it as a failure if there was only a thumbnail to download
+ #TODO Archive the individual images together into a cbz or something if we grabbed individual images
+ return result
+
+def parse_url(url):
+ "Parses url into a tuple of gallery id and token"
+ gallery_id = None
+ gallery_token = None
+
+ gallery_id_token = regex.search('(?<=g/)([0-9]+)/([a-zA-Z0-9]+)', url)
+ if gallery_id_token:
+ gallery_id_token = gallery_id_token.group()
+ gallery_id, gallery_token = gallery_id_token.split('/')
+ else:
+ log.warning("Error extracting g_id and g_token from url: {}".format(url))
+ return int(gallery_id), gallery_token
diff --git a/plugins/EHentai Downloader/readme.md b/plugins/EHentai Downloader/readme.md
new file mode 100644
index 0000000..36198e7
--- /dev/null
+++ b/plugins/EHentai Downloader/readme.md
@@ -0,0 +1,22 @@
+EHentai Downloader
+----------------------------
+
+> A plugin that enables downloading manga and doujinshi from E-Hentai & ExHentai
+
+**IMPORTANT:** To download using GP/credits, the plugin [EHentai Login](https://github.com/happypandax/plugins/tree/master/plugins/EHentai%20Login) is required to be present
+
+**IMPORTANT:** Only downloading through ehentai's archiver system (which costs GP/credits) is supported at this time
+
+## Configuration
+
+There's no available config options for this plugin
+
+## Things yet to be implemented
+
+- Support scraping the individual images
+- Torrents (waiting for HPX to support this)
+
+# Changelog
+
+- `1.0.0`
+ - first version
\ No newline at end of file
diff --git a/plugins/EHentai Downloader/test.py b/plugins/EHentai Downloader/test.py
new file mode 100644
index 0000000..36a317e
--- /dev/null
+++ b/plugins/EHentai Downloader/test.py
@@ -0,0 +1 @@
+# test.py
\ No newline at end of file
diff --git a/plugins/EHentai Login/hplugin.json b/plugins/EHentai Login/hplugin.json
new file mode 100644
index 0000000..f35d421
--- /dev/null
+++ b/plugins/EHentai Login/hplugin.json
@@ -0,0 +1,16 @@
+{
+ "id": "d9b1d111-7250-4083-9efb-356fabbeada7",
+ "shortname": "ehentai-login",
+ "name": "EHentai Login",
+ "version": "1.1.0",
+ "description": "A plugin that can login to E-Hentai & ExHentai",
+ "author": "Twiddly",
+ "site_folder": "site/",
+ "update_url": "https://github.com/happypandax/plugins/tree/master/plugins/EHentai%20Login",
+ "website": "https://github.com/happypandax/plugins/tree/master/plugins/EHentai%20Login",
+ "entry": "main.py",
+ "test": "test.py",
+ "require": [
+ "happypandax >= 0.10.0"
+ ]
+}
diff --git a/plugins/EHentai Login/main.py b/plugins/EHentai Login/main.py
new file mode 100644
index 0000000..7563dbe
--- /dev/null
+++ b/plugins/EHentai Login/main.py
@@ -0,0 +1,191 @@
+# main.py
+import __hpx__ as hpx
+import pickle
+import os
+
+from bs4 import BeautifulSoup
+
+log = hpx.get_logger("main")
+
+
+
+current_user_name = ""
+status_text = ""
+response = None
+user_dict = None
+
+save_file = os.path.join(hpx.constants.current_dir, '.info')
+
+default_delay = 8
+
+HEADERS = {'user-agent':"Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0"}
+
+match_url_prefix = r"^(http\:\/\/|https\:\/\/)?(www\.)?" # http:// or https:// + www.
+match_url_end = r"\/?$"
+
+url_regex = match_url_prefix + r"((exhentai|(g\.)?e-hentai)\.org)" + match_url_end
+
+MAIN_URLS = {
+ 'eh': "https://e-hentai.org",
+ 'ex': "https://exhentai.org"
+}
+
+URLS = MAIN_URLS
+URLS.update({
+ 'login': "https://e-hentai.org/home.php"
+})
+
+@hpx.subscribe("init")
+def inited():
+ # set default delay values if not set
+ delays = hpx.get_setting("network", "delays", {})
+ for u in (MAIN_URLS['ex'], MAIN_URLS['eh']):
+ if u not in delays:
+ log.info(f"Setting delay on {u} requests to {default_delay}")
+ delays[u] = default_delay
+ hpx.update_setting("network", "delays", delays)
+
+ # retrieve saved user info
+ if os.path.exists(save_file):
+ with open(save_file, "rb") as f:
+ user_dict = pickle.load(f)
+ if user_dict:
+ login(user_dict, {})
+ if response is not None:
+ log.info("Successfully re-logged in")
+
+@hpx.subscribe("disable")
+def disabled():
+ save_user_dict()
+
+@hpx.subscribe("remove")
+def removed():
+ pass
+
+@hpx.attach("Login.info")
+def login_info():
+ return hpx.command.LoginInfo(
+ identifier = "ehentai",
+ name = "EHentai",
+ parser = url_regex,
+ sites = ("www.e-hentai.org", "www.exhentai.org"),
+ description = "Login to E-Hentai & ExHentai",
+ )
+
+@hpx.attach("Login.login", trigger="ehentai")
+def login(userpass, options):
+ global current_user_name
+ global status_text
+ global response
+ global user_dict
+
+ user_dict = userpass
+ response = None
+ current_user_name = ""
+
+ ipb_member = userpass.get('ipb_member_id', "")
+ ipb_pass = userpass.get('ipb_pass_hash', "")
+ try:
+ if not ipb_member or not ipb_pass:
+ raise ValueError("Missing ipb_member_id or ipb_pass_hash")
+
+ cookies = {}
+
+ additional = userpass.get('additional', "")
+ if additional:
+ try:
+ additional = {k.strip():v.strip() for k, v in [x.strip().split('=', 1) for x in additional.split(',')] }
+ cookies.update(additional)
+ except:
+ raise ValueError("Failed to parse additional values")
+
+ cookies.update({
+ 'ipb_member_id': ipb_member,
+ 'ipb_pass_hash': ipb_pass,
+ })
+
+ # prepare request
+ req_props = hpx.command.RequestProperties(
+ session=True,
+ cookies=cookies,
+ headers=HEADERS
+ )
+
+ req = hpx.command.SingleGETRequest()
+
+ # check ehentai.org/home.php
+ r = req.request(URLS['login'], req_props)
+
+ if r.ok:
+ bad_access, msg = check_access(r)
+ status_text = msg
+ if not bad_access:
+ if userpass.get("exhentai", True):
+ # check exhentai
+ req_props.session = r.session
+ r = req.request(URLS['ex'], req_props)
+ if r.ok:
+ bad_access, status_text = check_access(r, ex=True)
+ else:
+ status_text = "Could not access ExHentai"
+
+ response = r
+
+ current_user_name = ipb_member
+ save_user_dict()
+
+ else:
+ status_text = r.reason
+
+ except ValueError as e:
+ status_text = str(e)
+
+ return response
+
+@hpx.attach("Login.status", trigger="ehentai")
+def status(options):
+ return status_text
+
+@hpx.attach("Login.logged_in", trigger="ehentai")
+def logged_in(options):
+ if response:
+ return True
+ return False
+
+@hpx.attach("Login.response", trigger="ehentai")
+def response_(options):
+ return response
+
+@hpx.attach("Login.current_user", trigger="ehentai")
+def current_user(options):
+ return current_user_name
+
+def check_access(r, ex=False):
+ msg = ""
+ bad_access = False
+ content_type = r.headers['content-type']
+ text = r.text
+ if 'image/gif' in content_type:
+ msg = "No access to ExHentai"
+ elif 'text/html' and 'Your IP address has been' in text:
+ msg = text
+ bad_access = True
+
+ if not bad_access and not ex:
+ soup = BeautifulSoup(text, "html.parser")
+ if soup.find("div", class_="homebox"): # we have access to home.php
+ pass
+ elif soup.find("form"): # login page
+ bad_access = True
+ msg = "Wrong credentials!"
+ if msg:
+ log.info(f"MSG: {msg}")
+ return bad_access, msg
+
+def save_user_dict():
+ global user_dict
+
+ # save user info
+ if user_dict:
+ with open(save_file, "wb") as f:
+ user_dict = pickle.dump(user_dict, f)
diff --git a/plugins/EHentai Login/readme.md b/plugins/EHentai Login/readme.md
new file mode 100644
index 0000000..7258ebc
--- /dev/null
+++ b/plugins/EHentai Login/readme.md
@@ -0,0 +1,22 @@
+EHentai Login
+----------------------------
+
+> This plugin can log-in to E-Hentai & ExHentai
+
+To login, go to this plugin's site through HPX.
+The plugin site can be accessed through a webclient, or at `[webserver-host]/plugin/d9b1d111-7250-4083-9efb-356fabbeada7`.
+Replace `[webserver-host]` with wherever your HPX webserver is hosted at.
+
+# Changelog
+
+- `1.1.0`
+ - Added a new field for additional cookie values
+
+- `1.0.0`
+ - Updated to reflect new changes in HPX v0.10.0
+
+- `0.2.0b`
+ - increase default delay limit on EH requests to `9` from `4` secs, this value can be tweaked in `network.delays` inside your`config.yaml`
+
+- `0.1.0b`
+ - first version
diff --git a/plugins/EHentai Login/site/index.html b/plugins/EHentai Login/site/index.html
new file mode 100644
index 0000000..6d463c0
--- /dev/null
+++ b/plugins/EHentai Login/site/index.html
@@ -0,0 +1,62 @@
+{% extends __default__ %}
+
+{% block include_javascript %}
+
+{% endblock %}
+
+{% block title %}
+EHentai Metadata Plugin
+{% endblock %}
+
+{% block before_content_form %}
+
Toggle the ExHentai option before logging in to also check for ExHentai access.
+The user has access to both E-Hentai and ExHentai if no message is displayed on a succesful log-in. Else the message No access to ExHentai will be displayed.
+
+
To find your IPB Member ID and IPB Pass Hash, follow these steps (should work on all browsers):
+
+ - Navigate to e-hentai.org (needs to be logged in) or exhentai.org
+ - Right click on page => Inpect element
+ - Go on Console tab
+ - Write: document.cookie
+ - A line of values should appear that correspond to active cookies
+ - Look for the ipb_member_id, ipb_pass_hash and other values
+
+
+{% endblock %}
+
+{% block outer_content_form %}
+
+{% endblock %}
diff --git a/plugins/EHentai Login/site/script.js b/plugins/EHentai Login/site/script.js
new file mode 100644
index 0000000..83b68b2
--- /dev/null
+++ b/plugins/EHentai Login/site/script.js
@@ -0,0 +1,124 @@
+const login_identifer = "ehentai"
+
+function main() {
+ check_login(true)
+}
+
+function set_err_msg(msg) {
+ document.querySelector("#error-msg").innerHTML = msg
+}
+
+function form_status(cls, msg) {
+ switch (cls) {
+ case 'success':
+ document.querySelector("form").classList.add("success")
+ document.querySelector("form").classList.remove("error")
+ document.querySelector("form").classList.remove("warning")
+ break
+ case 'error':
+ if (!document.querySelector("#error-msg").innerHTML)
+ set_err_msg("Failed to login: " + msg.toString())
+ document.querySelector("form").classList.add("error")
+ document.querySelector("form").classList.remove("success")
+ document.querySelector("form").classList.remove("warning")
+ break;
+ case 'warning':
+ document.querySelector("form").classList.toggle("warning")
+ break;
+ case 'loading':
+ document.querySelector("form").classList.add("loading")
+ break;
+ case '!loading':
+ document.querySelector("form").classList.remove("loading")
+ break;
+ }
+}
+
+function check_login(first_time) {
+ form_status("loading")
+ hpx.call_function(
+ "get_login_info",
+ {identifier: login_identifer},
+ data => {
+ let fdata = data.data
+ form_status("!loading")
+ if (fdata) {
+ if (fdata.logged_in) {
+ form_status("success")
+ if (fdata.status.toLowerCase().indexOf("exhentai") !== -1)
+ form_status("warning")
+ }
+ else
+ if (!first_time){
+ set_err_msg(fdata.status)
+ form_status("error")
+ }
+ } else {
+ if (!first_time) {
+ form_status("error", fdata.status)
+ }
+ }
+ })
+}
+
+function on_login(e) {
+ e.preventDefault()
+ let arr = serialize_form(e.target)
+ let data = {
+ exhentai: false
+ }
+ for (var i in arr) {
+ let x = arr[i]
+ if ( ['ipb_member_id', 'ipb_pass_hash', 'additional'].includes(x.name)) {
+ data[x.name] = x.value
+ } else if (x.name == 'exhentai')
+ data[x.name] = (x.value == 'on') ? true : false
+ }
+ if (data.ipb_member_id && data.ipb_pass_hash) {
+ hpx.call_function(
+ "submit_login",
+ {
+ identifier:login_identifer,
+ credentials: data,
+ })
+
+ // submit_login is an async function so delay abit before checking if the login was successful
+ // a better solution is to actually poll the command and get the result when finished, but ain't nobody got time for that
+ form_status("loading")
+ setTimeout(check_login, 4000)
+ } else {
+ form_status('error')
+ }
+}
+
+// in case the document is already rendered
+if (document.readyState!='loading') main();
+// modern browsers
+else if (document.addEventListener) document.addEventListener('DOMContentLoaded', main);
+// IE <= 8
+else document.attachEvent('onreadystatechange', function(){
+ if (document.readyState=='complete') main();
+});
+
+// Serialize form data into an array
+function serialize_form(form) {
+ var field, l, s = [];
+ if (typeof form == 'object' && form.nodeName == "FORM") {
+ var len = form.elements.length;
+ for (var i=0; i= 0.10.0",
+ "ehentai-login"
+ ]
+}
\ No newline at end of file
diff --git a/plugins/EHentai Metadata/main.py b/plugins/EHentai Metadata/main.py
new file mode 100644
index 0000000..9f32d40
--- /dev/null
+++ b/plugins/EHentai Metadata/main.py
@@ -0,0 +1,593 @@
+# main.py
+import __hpx__ as hpx
+import regex
+import arrow
+import datetime
+import os
+import urllib
+import html
+
+from bs4 import BeautifulSoup
+from PIL import Image, ImageChops
+
+log = hpx.get_logger("main")
+
+MATCH_URL_PREFIX = r"^(http\:\/\/|https\:\/\/)?(www\.)?" # http:// or https:// + www.
+MATCH_URL_END = r"\/?$"
+
+DEFAULT_DELAY = 8
+
+URLS_REGEX = {
+ 'eh_gallery': MATCH_URL_PREFIX + r"((? 8: # check if exceeds 8 keywords retry with quotes around title
+ r = title_search(f'"{title}"', ex, session, _times=_times+1)
+
+ return r
+
+def eh_page_results(eh_page_url, limit=None, session=None):
+ "Opens eh page, parses for results, and then returns list of (title, url)"
+ found_urls = []
+ if limit is None:
+ limit = PLUGIN_CONFIG.get("gallery_results_limit")
+
+ # prepare request
+ req_props = hpx.command.RequestProperties(
+ headers=HEADERS,
+ )
+ if session:
+ req_props.session = session
+ log.debug(f"COOKIES: {session.cookies.keys()}")
+ r = hpx.command.SingleGETRequest().request(eh_page_url, req_props)
+ soup = BeautifulSoup(r.text, "html.parser")
+ list_style = "compact"
+ dmi_div = soup.find("div", id="dms")
+ if dmi_div:
+ list_style = dmi_div.find("option", selected=True).string.lower()
+ results = []
+ if list_style == "compact":
+ results = soup.findAll("td", class_="gl3c glname", limit=limit)
+ elif list_style == "minimal":
+ results = soup.findAll("td", class_="gl3m glname", limit=limit)
+ elif list_style == "extended":
+ results = soup.findAll("div", class_="gl4e glname", limit=limit)
+ elif list_style == "thumbnail":
+ results = soup.findAll("div", class_="gl4t glname", limit=limit)
+ # str(x.a.string)
+ found_urls = [(str(x.a.string), x.a['href']) for x in results] # title, url
+
+ if not found_urls:
+ log.debug(f"HTML: {r.text}")
+ return found_urls
+
+def parse_url(url):
+ "Parses url into a tuple of gallery id and token"
+ gallery_id = None
+ gallery_token = None
+
+ gallery_id_token = regex.search('(?<=g/)([0-9]+)/([a-zA-Z0-9]+)', url)
+ if gallery_id_token:
+ gallery_id_token = gallery_id_token.group()
+ gallery_id, gallery_token = gallery_id_token.split('/')
+ else:
+ log.warning("Error extracting g_id and g_token from url: {}".format(url))
+ return int(gallery_id), gallery_token
+
+def apply(datatuple):
+ """
+ Called to fetch and apply metadata to the given data items.
+ Remember to set the `status` property on the :class:`MetadataResult` object to `True` on a successful fetch.
+ """
+ mresults = []
+ applied = False
+ eh_data = {
+ 'method': 'gdata',
+ 'gidlist': [],
+ 'namespace': 1
+ }
+
+ mdata_map = {} # (gid,token):metadatadata and gid:metadatadata
+
+ for d in datatuple:
+ eh_data['gidlist'].append(d.data['gallery'])
+ mdata_map[tuple(d.data['gallery'])] = d
+ mdata_map[d.data['gallery'][0]] = d # used for when token is invalid, assumes that there's no duplicate gid's
+
+ # prepare request
+ req_props = hpx.command.RequestProperties(
+ headers=HEADERS,
+ json=eh_data
+ )
+ r = hpx.command.SinglePOSTRequest().request(URLS['api'], req_props)
+ if r.ok:
+ response = r.json
+ if response and not 'error' in response:
+ for gdata in response['gmetadata']:
+ if 'error' in gdata:
+ mdata = mdata_map[gdata['gid']]
+ mresults.append(hpx.command.MetadataResult(data=mdata, status=False, reason=gdata['error']))
+ else:
+ mdata = mdata_map[(gdata['gid'], gdata['token'])]
+ urls_to_apply = []
+ if PLUGIN_CONFIG.get('add_gallery_url', True):
+ urls_to_apply.append(mdata.data['gallery_url'])
+ fdata = format_metadata(gdata, mdata.metadataitem.item, urls_to_apply=urls_to_apply)
+ applied = apply_metadata(fdata, mdata.metadataitem.item, mdata.options)
+ mresults.append(hpx.command.MetadataResult(data=mdata, status=applied, reason="No data was applied" if not applied else ""))
+
+ elif response:
+ log.warning(response)
+ for d in datatuple:
+ mresults.append(hpx.command.MetadataResult(data=d, status=False, reason=response['error']))
+
+ return tuple(mresults)
+
+def capitalize_text(text):
+ """
+ better str.capitalize
+ """
+ return " ".join(x.capitalize() for x in text.strip().split())
+
+def format_metadata(gdata, item, urls_to_apply=None):
+ """
+ Formats metadata to look like this for apply_metadata:
+ data = {
+ 'titles': None, # [(title, language),...]
+ 'artists': None, # [(artist, (circle, circle, ..)),...]
+ 'parodies': None, # [parody, ...]
+ 'category': None,
+ 'tags': None, # [tag, tag, tag, ..] or {ns:[tag, tag, tag, ...]}
+ 'pub_date': None, # DateTime object or Arrow object
+ 'language': None,
+ 'urls': None # [url, ...]
+ }
+ """
+ mdata = {}
+
+ mdata['titles'] = []
+
+ parsed_text = hpx.command.ItemTextParser(gdata['title'])
+
+ parsed_title = parsed_text.extract_title()
+ if parsed_title:
+ parsed_title = parsed_title[0]
+ mdata['titles'].append((parsed_title or gdata['title'], 'english'))
+
+ mdata['titles'].append((gdata['title_jpn'], 'japanese'))
+
+
+ mdata['category'] = gdata['category']
+ mdata['pub_date'] = arrow.Arrow.fromtimestamp(gdata['posted'])
+
+ lang = "japanese" # default language
+
+ artists = set()
+ circles = set()
+ parodies = set()
+
+ parsed_artists = parsed_text.extract_artist()
+ parsed_circles = parsed_text.extract_circle()
+
+ extranous_namespaces = ("artist", "parody", "group", "language")
+ mdata['tags'] = {}
+ for nstag in gdata['tags']:
+
+ blacklist_tags = PLUGIN_CONFIG.get("blacklist_tags")
+ if blacklist_tags and nstag in blacklist_tags:
+ continue
+
+ ns = None
+ if ':' in nstag:
+ ns, t = nstag.split(':', 1)
+ else:
+ t = nstag
+
+ if ns == 'language' and t != 'translated':
+ lang = t
+ elif ns == "artist":
+ for a in parsed_artists: # the artist extracted from the title likely has better capitalization, so choose that instead
+ if a.lower() == t.lower():
+ artists.add(a)
+ break
+ else:
+ artists.add(t)
+ elif ns == "group":
+ for c in parsed_circles: # the circle extracted from the title likely has better capitalization, so choose that instead
+ if c.lower() == t.lower():
+ circles.add(c)
+ break
+ else:
+ circles.add(t)
+ elif ns == "parody":
+ parodies.add(t)
+
+ if not (PLUGIN_CONFIG.get("remove_namespaces") and ns in extranous_namespaces):
+ mdata['tags'].setdefault(ns, []).append(t)
+ else:
+ log.debug(f"removing namespace {ns}")
+
+ log.debug(f"tags: {mdata['tags']}")
+
+ mdata['language'] = lang
+
+ if parodies:
+ mdata['parodies'] = parodies
+
+ if artists:
+ a_circles = []
+ for a in artists:
+ a_circles.append((a, tuple(circles))) # assign circles to each artist
+ mdata['artists'] = a_circles
+
+ if urls_to_apply:
+ mdata['urls'] = urls_to_apply
+
+ return mdata
+
+GalleryData = hpx.command.GalleryData
+LanguageData = hpx.command.LanguageData
+TitleData = hpx.command.TitleData
+ArtistData = hpx.command.ArtistData
+ArtistNameData = hpx.command.ArtistNameData
+ParodyData = hpx.command.ParodyData
+ParodyNameData = hpx.command.ParodyNameData
+CircleData = hpx.command.CircleData
+CategoryData = hpx.command.CategoryData
+UrlData = hpx.command.UrlData
+NamespaceTagData= hpx.command.NamespaceTagData
+TagData= hpx.command.TagData
+NamespaceData = hpx.command.NamespaceData
+
+def apply_metadata(data, gallery, options):
+ """
+ data = {
+ 'titles': None, # [(title, language),...]
+ 'artists': None, # [(artist, (circle, circle, ..)),...]
+ 'parodies': None, # [parody, ...]
+ 'category': None,
+ 'tags': None, # [tag, tag, tag, ..] or {ns:[tag, tag, tag, ...]}
+ 'pub_date': None, # DateTime object or Arrow object
+ 'language': None,
+ 'urls': None # [url, ...]
+ }
+ """
+
+ log.debug(f"data: {data}")
+
+ gdata = GalleryData()
+
+ if isinstance(data.get('titles'), (list, tuple, set)):
+ gtitles = []
+ for t, l in data['titles']:
+ gtitle = None
+ if t:
+ t = html.unescape(t)
+ gtitle = TitleData(name=t)
+ if t and l:
+ gtitle.language = LanguageData(name=l)
+ if gtitle:
+ gtitles.append(gtitle)
+
+ if gtitles:
+ gdata.titles = gtitles
+ log.debug("applied titles")
+
+ if isinstance(data.get('artists'), (list, tuple, set)):
+ gartists = []
+ for a, c in data['artists']:
+ if a:
+ gartist = ArtistData(names=[ArtistNameData(name=capitalize_text(a))])
+ gartists.append(gartist)
+
+ if c:
+ gcircles = []
+ for circlename in [x for x in c if x]:
+ gcircles.append(CircleData(name=capitalize_text(circlename)))
+ gartist.circles = gcircles
+
+ if gartists:
+ gdata.artists = gartists
+ log.debug("applied artists")
+
+ if isinstance(data.get('parodies'), (list, tuple, set)):
+ gparodies = []
+ for p in data['parodies']:
+ if p:
+ gparody = ParodyData(names=[ParodyNameData(name=capitalize_text(p))])
+ gparodies.append(gparody)
+
+ if gparodies:
+ gdata.parodies = gparodies
+ log.debug("applied parodies")
+
+ if data.get('category'):
+ gdata.category = CategoryData(name=data['category'])
+ log.debug("applied category")
+
+ if data.get('language'):
+ gdata.language = LanguageData(name=data['language'])
+ log.debug("applied language")
+
+ if isinstance(data.get('tags'), (dict, list)):
+ if isinstance(data['tags'], list):
+ data['tags'] = {None: data['tags']}
+ gnstags = []
+ for ns, tags in data['tags'].items():
+ if ns is not None:
+ ns = ns.strip()
+ if ns and ns.lower() == 'misc':
+ ns = None
+ for t in tags:
+ t = t.strip()
+ if t:
+ kw = {'tag': TagData(name=t)}
+ if ns:
+ kw['namespace'] = NamespaceData(name=ns)
+ gnstags.append(NamespaceTagData(**kw))
+
+ if gnstags:
+ gdata.tags = gnstags
+ log.debug("applied tags")
+
+ if isinstance(data.get('pub_date'), (datetime.datetime, arrow.Arrow)):
+ pub_date = data['pub_date']
+ gdata.pub_date = pub_date
+ log.debug("applied pub_date")
+
+ if isinstance(data.get('urls'), (list, tuple)):
+ gurls = []
+ for u in data['urls']:
+ if u:
+ gurls.append(UrlData(name=u))
+ if gurls:
+ gdata.urls = gurls
+ log.debug("applied urls")
+
+ applied = hpx.command.UpdateItemData(gallery, gdata, options=options)
+
+ log.debug(f"applied: {applied}")
+
+ return applied
\ No newline at end of file
diff --git a/plugins/EHentai Metadata/readme.md b/plugins/EHentai Metadata/readme.md
new file mode 100644
index 0000000..423daac
--- /dev/null
+++ b/plugins/EHentai Metadata/readme.md
@@ -0,0 +1,67 @@
+EHentai Metadata
+----------------------------
+
+> This plugin fetches metadata from E-Hentai & ExHentai
+
+**IMPORTANT:** This plugin requires the [EHentai Login](https://github.com/happypandax/plugins/tree/master/plugins/EHentai%20Login) plugin to be present
+
+## Configuration
+
+Configure this plugin by adding `ehentai-metadata` to the `plugin.config` namespace in your `config.yaml`:
+```yaml
+plugin:
+ config:
+ ehentai-metadata:
+ option1: True
+ option2:
+ - item 1
+ - item 2
+```
+
+#### Available options
+
+Name | Default | Description
+--- | --- | ---
+`filename_search` | `true` | use the filename/folder-name for searching instead of gallery title
+`expunged_galleries` | `false` | enable expunged galleries in results
+`remove_namespaces` | `true` | remove superfluous namespaces like 'artist', 'language' and 'group' because they are handled specially in HPX
+`gallery_results_limit` | `10` | maximum amount of galleries to return
+`blacklist_tags` | `[]` | tags to ignore when updating tags, a list of `namespace:tag` strings
+`add_gallery_url` | `true` | add ehentai url to gallery
+`preferred_language` | `"english"` | preferred gallery language (in gallery title) to extract from if multiple galleries were found, set empty string for default
+`enabled_categories` | `['manga', 'doujinshi', 'non-h', 'artistcg', 'gamecg', 'western', 'imageset', 'cosplay', 'asianporn', 'misc']` | categories that are enbaled for the search
+`search_query` | `"{title}"` | the search query, '{title}' will be replaced with the gallery title, use double curly brackets to escape a curly bracket. Tip: if you want to only allow english results, you should modify this to "{title} language:english"
+`search_low_power_tags` | `true` | enable search low power tags
+`search_torrent_name` | `true` | enable search torrent name
+`search_gallery_description` | `false` | enable search gallery description
+
+## Things yet to be implemented
+
+- File similarity search
+
+# Changelog
+
+- `1.2.1`
+ - some misc. changes
+
+- `1.2.0`
+ - fixed title being qouted unconditionally
+ - retry the search with qouted title if keyword count exceeds 8
+
+- `1.1.0`
+ - added several new options and fixed some errors
+
+- `1.0.0`
+ - updated to reflect new changes in HPX v0.10.0
+
+- `0.4.0b`
+ - updated to work on new EH website design changes
+
+- `0.3.0b`
+ - add a default delay on `https://api.e-hentai.org/` requests, this value can be tweaked in `network.delays` inside your`config.yaml`
+
+- `0.2.0b`
+ - added `preferred_language` option
+
+- `0.1.0b`
+ - first version
\ No newline at end of file
diff --git a/plugins/EHentai Metadata/test.py b/plugins/EHentai Metadata/test.py
new file mode 100644
index 0000000..36a317e
--- /dev/null
+++ b/plugins/EHentai Metadata/test.py
@@ -0,0 +1 @@
+# test.py
\ No newline at end of file
diff --git a/plugins/File Metadata/extractors/__init__.py b/plugins/File Metadata/extractors/__init__.py
new file mode 100644
index 0000000..8951b5c
--- /dev/null
+++ b/plugins/File Metadata/extractors/__init__.py
@@ -0,0 +1,3 @@
+from . import eze
+from . import ehentaidownloader
+from . import hdoujin
\ No newline at end of file
diff --git a/plugins/File Metadata/extractors/common.py b/plugins/File Metadata/extractors/common.py
new file mode 100644
index 0000000..23b79c6
--- /dev/null
+++ b/plugins/File Metadata/extractors/common.py
@@ -0,0 +1,112 @@
+import __hpx__ as hpx
+import enum
+import json
+import typing
+
+log = hpx.get_logger(__name__)
+
+class IncompatibleFile(ValueError):
+ pass
+
+class DataType(enum.Flag):
+ """
+ The available extractors.
+ Add your new extractor here
+ """
+ eze = enum.auto()
+ hdoujin = enum.auto()
+ e_hentai_downloader = enum.auto()
+
+# The filetypes to look for, no duplicates, only add if necessary
+filetypes = ('.json', '.txt')
+# Which filetype belongs to which extractor, use inclusive OR '|' to combine multiple extractors
+filenames = {
+ "info.json": DataType.eze | DataType.hdoujin,
+ "info.txt": DataType.hdoujin | DataType.e_hentai_downloader,
+}
+
+common_data = {
+ 'titles': None, # [(title, language),...]
+ 'artists': None, # [(artist, (circle, circle, ..)),...]
+ 'parodies': None, # [parody, ...]
+ 'category': None,
+ 'tags': None, # [tag, tag, tag, ..] or {ns:[tag, tag, tag, ...]}
+ 'pub_date': None, # DateTime object or Arrow object
+ 'language': None,
+ 'urls': None # [url, ...]
+}
+
+plugin_config = {
+ 'characters_namespace': 'character', # hdoujin, which namespace to put the values in the CHARACTERS field in
+}
+
+extractors = {}
+
+def capitalize_text(text):
+ """
+ better str.capitalize
+ """
+ return " ".join(x.capitalize() for x in text.strip().split())
+
+def register_extractor(cls, type):
+ assert issubclass(cls, Extractor)
+ assert isinstance(type, DataType)
+ extractors[type] = cls()
+
+class Extractor:
+ """
+ Base extractor
+ """
+
+ def file_to_dict(self, fs: hpx.command.CoreFS) -> typing.Union[dict, None]:
+ """
+ A subclass can choose to override or extend this method.
+ Should return a dict with data from the file which will be passed to the extract method.
+ If the file is not supported or should be skipped, return None.
+ The parameter fs is the file in question.
+
+ Below is convenience code to read and convert a file into a dict.
+ Supports json and txt files.
+ If file is a txt, will try to parse files like this:
+ Field A: value 1
+ Field B: value 2
+ ->
+ {
+ 'Field A': 'value 1',
+ 'Field B': 'value 2',
+ }
+ otherwise the txt file is not supported and a ValueError will be raised.
+ NotImplementedError will be raised if file is neither json or txt file.
+ """
+ try:
+ d = {}
+ log.debug(f"File ext: {fs.ext}")
+ kw = {}
+ if not fs.inside_archive:
+ kw['encoding'] = 'utf-8'
+ if fs.ext.lower() == '.json':
+ with fs.open("r", **kw) as f:
+ d = json.load(f)
+ elif fs.ext.lower() == '.txt':
+ with fs.open("r", **kw) as f:
+ for line in f.readlines():
+ l = line.strip()
+ if isinstance(l, bytes):
+ l = l.decode(encoding="utf-8", errors="ignore")
+ k, v = l.split(':', 1)
+ if k.strip():
+ d[k.strip()] = v.strip()
+ else:
+ raise NotImplementedError(f"{fs.ext} filetype not supported yet")
+ except Exception as e: # Bad, I know, but too lazy
+ raise IncompatibleFile(e)
+ return d
+
+ def extract(self, filedata: dict) -> dict:
+ """
+ A subclass must implement this method.
+ Should populate a dict that looks like common_data (see above) and return it
+
+ filedata parameter is the dict created in the file_to_dict method
+ """
+ raise NotImplementedError
diff --git a/plugins/File Metadata/extractors/ehentaidownloader.py b/plugins/File Metadata/extractors/ehentaidownloader.py
new file mode 100644
index 0000000..3a02f95
--- /dev/null
+++ b/plugins/File Metadata/extractors/ehentaidownloader.py
@@ -0,0 +1,80 @@
+import __hpx__ as hpx
+from . import common
+
+log = hpx.get_logger(__name__)
+
+class EHentaiDownloader(common.Extractor):
+
+ def file_to_dict(self, fs):
+ """
+ A subclass can choose to override or extend this method.
+ Should return a dict with data from the file which will be passed to the extract method.
+ If the file is not supported or should be skipped, return None.
+ The parameter fs is the file in question.
+
+ File is formatted weirdly so we just return {linenumber : line}
+ """
+ d = {}
+ log.debug(f"File ext: {fs.ext}")
+ kw = {}
+ if not fs.inside_archive:
+ kw['encoding'] = "utf-8"
+ with fs.open("r", **kw) as f:
+ for num, line in enumerate(f.readlines(), 1):
+ if isinstance(line, bytes):
+ line = line.decode("utf-8")
+ d[num] = line
+
+ # confirm it's the right file
+ if d and not "generated by e-hentai downloader" in d[len(d)].lower():
+ d = None
+ return d
+
+ def extract(self, filedata):
+ """
+ A subclass must implement this method.
+ Should populate a dict that looks like common_data (see common.py) and return it
+
+ filedata parameter is the dict created in the file_to_dict method
+ """
+ d = {}
+ if filedata:
+ log.debug("Expecting e-hentai downloader metadata file")
+ for linenum in sorted(filedata):
+ line = filedata[linenum].strip()
+ if not line:
+ continue
+
+ if line.startswith("Language:"):
+ line = line.split(':', 1)[1]
+ d['language'] = common.capitalize_text(line.lower().split()[0])
+ continue
+
+ if line.startswith("Category:"):
+ line = line.split(':', 1)[1]
+ d['category'] = common.capitalize_text(line.lower())
+ continue
+
+ if line.startswith("> "): # tags
+ line = line[2:] # remove >
+ ns, tags = line.split(':', 1)
+ tags = tags.split(",")
+ d.setdefault("tags", {})[ns.strip()] = [t.strip() for t in tags]
+ continue
+
+ if linenum in (1, 2, 3): # most likely a title or url, must be last because maybe it wasn't included
+ # ensure
+ if not filedata.get(3, "").startswith("http"):
+ continue
+
+ if linenum == 3:
+ d.setdefault('urls', []).append(line)
+ else:
+ title_lang = "english" if linenum == 1 else "japanese"
+ nameparser = hpx.command.ItemTextParser(line)
+ parsed_title = nameparser.extract_title()
+ d.setdefault("titles", []).append((parsed_title[0] if parsed_title else line, title_lang))
+ continue
+ return d
+
+common.register_extractor(EHentaiDownloader, common.DataType.e_hentai_downloader)
diff --git a/plugins/File Metadata/extractors/eze.py b/plugins/File Metadata/extractors/eze.py
new file mode 100644
index 0000000..cf4be26
--- /dev/null
+++ b/plugins/File Metadata/extractors/eze.py
@@ -0,0 +1,70 @@
+import arrow
+import __hpx__ as hpx
+from . import common
+
+log = hpx.get_logger(__name__)
+
+class Eze(common.Extractor):
+
+ def file_to_dict(self, fs):
+ """
+ A subclass can choose to override or extend this method.
+ Should return a dict with data from the file which will be passed to the extract method.
+ If the file is not supported or should be skipped, return None.
+ The parameter fs is the file in question.
+ """
+ d = super().file_to_dict(fs)
+ k = ('gallery_info',)
+ if d and not all(map(lambda x: x in d, k)): # make sure all keys are present
+ d = None
+ k = ('image_info', 'gallery_info_full')
+ if d and not any(map(lambda x: x in d, k)): # make sure one of the keys are present
+ d = None
+ return d
+
+ def extract(self, filedata):
+ """
+ A subclass must implement this method.
+ Should populate a dict that looks like common_data (see common.py) and return it
+
+ filedata parameter is the dict created in the file_to_dict method
+ """
+ d = {}
+ filedata = filedata.get('gallery_info')
+ if filedata:
+ log.debug("Expecting eze metadata file")
+ mtitle = filedata.get('title')
+ mtitle_jp = filedata.get('title_original')
+
+ mcat = filedata.get("category")
+ if mcat:
+ d['category'] = common.capitalize_text(mcat)
+
+ for t, l in ((mtitle, "english"), (mtitle_jp, "japanese")):
+ if t:
+ nameparser = hpx.command.ItemTextParser(t)
+ parsed_title = nameparser.extract_title()
+ d.setdefault("titles", []).append((parsed_title[0] if parsed_title else t, l))
+
+ mtags = filedata.get("tags")
+
+ if mtags:
+ d['tags'] = {}
+ for ns, t in mtags.items():
+ d['tags'].setdefault(common.capitalize_text(ns), t)
+
+ mlang = filedata.get("language")
+ if mlang:
+ d['language'] = common.capitalize_text(mlang)
+
+ msource = filedata.get('source')
+ if msource:
+ d.setdefault('urls', []).append(f"https://{msource['site']}.org/g/{msource['gid']}/{msource['token']}")
+
+ mupdate = filedata.get("upload_date")
+ if mupdate:
+ d['pub_date'] = arrow.get(*mupdate)
+
+ return d
+
+common.register_extractor(Eze, common.DataType.eze)
diff --git a/version/File Metadata/extractors.py b/plugins/File Metadata/extractors/hdoujin.py
similarity index 61%
rename from version/File Metadata/extractors.py
rename to plugins/File Metadata/extractors/hdoujin.py
index a2f1f4b..df2fc06 100644
--- a/version/File Metadata/extractors.py
+++ b/plugins/File Metadata/extractors/hdoujin.py
@@ -1,57 +1,17 @@
import __hpx__ as hpx
-import common
+from . import common
log = hpx.get_logger(__name__)
-class Eze(common.Extractor):
-
- def file_to_dict(self, fs):
- d = super().file_to_dict(fs)
- k = ('gallery_info', 'image_info')
- if d and not all(map(lambda x: x in d, k)): # make sure all keys are present
- d = None
- return d
-
- def extract(self, filedata):
- d = {}
- filedata = filedata.get('gallery_info')
- if filedata:
- log.debug("Expecting eze metadata file")
- mtitle = filedata.get('title')
- mtitle_jp = filedata.get('title_original')
-
- mcat = filedata.get("category")
- if mcat:
- d['category'] = common.capitalize_text(mcat)
-
- for t, l in ((mtitle, "english"), (mtitle_jp, "japanese")):
- if t:
- nameparser = hpx.command.NameParser(t)
- parsed_title = nameparser.extract_title()
- d.setdefault("titles", []).append((parsed_title[0] if parsed_title else t, l))
-
- mtags = filedata.get("tags")
-
- if mtags:
- d['tags'] = {}
- for ns, t in mtags.items():
- d['tags'].setdefault(common.capitalize_text(ns), t)
-
- mlang = filedata.get("language")
- if mlang:
- d['language'] = common.capitalize_text(mlang)
-
- msource = filedata.get('source')
- if msource:
- d.setdefault('urls', []).append(f"https://{msource['site']}.org/g/{msource['gid']}/{msource['token']}")
-
- return d
-
-common.register_extractor(Eze, common.DataType.eze)
-
class HDoujin(common.Extractor):
def file_to_dict(self, fs):
+ """
+ A subclass can choose to override or extend this method.
+ Should return a dict with data from the file which will be passed to the extract method.
+ If the file is not supported or should be skipped, return None.
+ The parameter fs is the file in question.
+ """
d = super().file_to_dict(fs)
if d:
if fs.ext.lower() == '.txt':
@@ -73,6 +33,12 @@ def file_to_dict(self, fs):
return d
def extract(self, filedata):
+ """
+ A subclass must implement this method.
+ Should populate a dict that looks like common_data (see common.py) and return it
+
+ filedata parameter is the dict created in the file_to_dict method
+ """
d = {}
if filedata:
log.debug("Expecting hdoujin metadata file")
@@ -81,7 +47,7 @@ def extract(self, filedata):
for t, l in ((mtitle, "english"), (mtitle_jp, "japanese")):
if t:
- nameparser = hpx.command.NameParser(t)
+ nameparser = hpx.command.ItemTextParser(t)
parsed_title = nameparser.extract_title()
d.setdefault("titles", []).append((parsed_title[0] if parsed_title else t, l))
@@ -113,7 +79,19 @@ def extract(self, filedata):
for ns, t in mtags.items():
d['tags'].setdefault(common.capitalize_text(ns), t)
else:
- d['tags'] = mtags
+ d['tags'] = {None: mtags} # None for no namespace
+
+ mcharacters = filedata.get("characters")
+ if mcharacters:
+ if isinstance(mcharacters, str):
+ mcharacters = mcharacters.split(',')
+ d.setdefault('tags', {})[common.plugin_config.get('characters_namespace') or 'characters'] = mcharacters
+
+ mparody = filedata.get("parody")
+ if mparody:
+ if isinstance(mparody, str):
+ mparody = mparody.split(',')
+ d['parodies'] = mparody
mlang = filedata.get("language")
if mlang:
@@ -127,4 +105,4 @@ def extract(self, filedata):
return d
-common.register_extractor(HDoujin, common.DataType.hdoujin)
+common.register_extractor(HDoujin, common.DataType.hdoujin)
\ No newline at end of file
diff --git a/plugins/File Metadata/hplugin.json b/plugins/File Metadata/hplugin.json
new file mode 100644
index 0000000..9defcf6
--- /dev/null
+++ b/plugins/File Metadata/hplugin.json
@@ -0,0 +1,12 @@
+{
+ "id": "e38e24e4-8ca8-420e-b52b-c75510097653",
+ "shortname": "file-metadata",
+ "name": "File Metadata",
+ "version": "2.0.2",
+ "description": "Extracts and applies metadata from a file accompanying a gallery. Supports files produced from eze, e-hentai-downloader and hdoujin",
+ "author": "Twiddly",
+ "update_url": "https://github.com/happypandax/plugins/tree/master/plugins/File%20Metadata",
+ "website": "https://github.com/happypandax/plugins/tree/master/plugins/File%20Metadata",
+ "entry": "main.py",
+ "require": ["happypandax >= 0.11.0"]
+}
diff --git a/plugins/File Metadata/main.py b/plugins/File Metadata/main.py
new file mode 100644
index 0000000..4e85c0f
--- /dev/null
+++ b/plugins/File Metadata/main.py
@@ -0,0 +1,275 @@
+import __hpx__ as hpx
+import os
+import arrow
+import datetime
+import html
+import extractors
+from extractors import common
+
+log = hpx.get_logger(__name__)
+
+options = {
+}
+
+def get_common_data(datatypes, fpath):
+ assert isinstance(datatypes, common.DataType)
+ d = {}
+ fpath = hpx.command.CoreFS(fpath)
+
+ for datatype in common.DataType:
+ if datatype & datatypes:
+ log.info(f"Attempting with {datatype}")
+ md = {}
+
+ ex = common.extractors.get(datatype, None)
+ if ex:
+ try:
+ fdata = ex.file_to_dict(fpath)
+ except common.IncompatibleFile as e:
+ log.info(f"Skipping incompatible file for {datatype}: {str(e)}")
+ continue
+ if fdata:
+ log.info(f"{datatype} matched!")
+ md.update(ex.extract(fdata))
+ else:
+ log.info(f"{datatype} didn't match")
+ if md:
+ d.update(md)
+ break
+ return d
+
+SetValue = hpx.command.Set
+GalleryData = hpx.command.GalleryData
+LanguageData = hpx.command.LanguageData
+TitleData = hpx.command.TitleData
+ArtistData = hpx.command.ArtistData
+ArtistNameData = hpx.command.ArtistNameData
+ParodyData = hpx.command.ParodyData
+ParodyNameData = hpx.command.ParodyNameData
+CircleData = hpx.command.CircleData
+CategoryData = hpx.command.CategoryData
+UrlData = hpx.command.UrlData
+NamespaceTagData = hpx.command.NamespaceTagData
+TagData = hpx.command.TagData
+NamespaceData = hpx.command.NamespaceData
+
+def apply_metadata(data, gallery, options={}):
+ """
+ data = {
+ 'titles': None, # [(title, language),...]
+ 'artists': None, # [(artist, (circle, circle, ..)),...]
+ 'parodies': None, # [parody, ...]
+ 'category': None,
+ 'tags': None, # [tag, tag, tag, ..] or {ns:[tag, tag, tag, ...]}
+ 'pub_date': None, # DateTime object or Arrow object
+ 'language': None,
+ 'urls': None # [url, ...]
+ }
+ """
+
+ log.debug(f"data: {data}")
+
+ gdata = GalleryData()
+
+ if isinstance(data.get('titles'), (list, tuple, set)):
+ gtitles = []
+ for t, l in data['titles']:
+ gtitle = None
+ if t:
+ t = html.unescape(t)
+ gtitle = TitleData(name=t)
+ if t and l:
+ gtitle.language = LanguageData(name=l)
+ if gtitle:
+ gtitles.append(gtitle)
+
+ if gtitles:
+ gdata.titles = SetValue(gtitles)
+ log.debug("applied titles")
+
+ if isinstance(data.get('artists'), (list, tuple, set)):
+ gartists = []
+ for a, c in data['artists']:
+ if a:
+ gartist = ArtistData(names=[ArtistNameData(name=common.capitalize_text(a))])
+ gartists.append(gartist)
+
+ if c:
+ gcircles = []
+ for circlename in [x for x in c if x]:
+ gcircles.append(CircleData(name=common.capitalize_text(circlename)))
+ gartist.circles = gcircles
+
+ if gartists:
+ gdata.artists = SetValue(gartists)
+ log.debug("applied artists")
+
+ if isinstance(data.get('parodies'), (list, tuple, set)):
+ gparodies = []
+ for p in data['parodies']:
+ if p:
+ gparody = ParodyData(names=[ParodyNameData(name=common.capitalize_text(p))])
+ gparodies.append(gparody)
+
+ if gparodies:
+ gdata.parodies = SetValue(gparodies)
+ log.debug("applied parodies")
+
+ if data.get('category'):
+ gdata.category = SetValue(CategoryData(name=data['category']))
+ log.debug("applied category")
+
+ if data.get('language'):
+ gdata.language = SetValue(LanguageData(name=data['language']))
+ log.debug("applied language")
+
+ if isinstance(data.get('tags'), (dict, list)):
+ if isinstance(data['tags'], list):
+ data['tags'] = {None: data['tags']}
+ gnstags = []
+ for ns, tags in data['tags'].items():
+ if ns is not None:
+ ns = ns.strip()
+ if ns and ns.lower() == 'misc':
+ ns = None
+ for t in tags:
+ t = t.strip()
+ if t:
+ kw = {'tag': TagData(name=t)}
+ if ns:
+ kw['namespace'] = NamespaceData(name=ns)
+ gnstags.append(NamespaceTagData(**kw))
+
+ if gnstags:
+ gdata.tags = SetValue(gnstags)
+ log.debug("applied tags")
+
+ if isinstance(data.get('pub_date'), (datetime.datetime, arrow.Arrow)):
+ pub_date = data['pub_date']
+ gdata.pub_date = SetValue(pub_date)
+ log.debug("applied pub_date")
+
+ if isinstance(data.get('urls'), (list, tuple)):
+ gurls = []
+ for u in data['urls']:
+ if u:
+ gurls.append(UrlData(name=u))
+ if gurls:
+ gdata.urls = SetValue(gurls)
+ log.debug("applied urls")
+
+ if data.get('times_read'):
+ gdata.times_read = SetValue(data['times_read'])
+ log.debug("applied times_read")
+
+ if data['times_read'] > 0:
+ gallery_id = gallery.id
+ page_id = gallery.last_page.id
+
+ GalleryProgress.update_progress(gallery_id, page_id)
+
+ applied = hpx.command.UpdateItemData(gallery, gdata, options=options)
+
+ log.debug(f"applied: {applied}")
+
+ return applied
+
+@hpx.subscribe("init")
+def inited():
+ common.plugin_config.update(hpx.get_plugin_config())
+
+@hpx.subscribe('config_update')
+def config_update(cfg):
+ common.plugin_config.update(cfg)
+
+def has_file_metadata(path):
+ fs = hpx.command.CoreFS(path)
+
+ contents = {x: os.path.split(x)[1].lower() for x in fs.contents(corefs=False) if x.lower().endswith(common.filetypes)}
+ log.debug(f"Contents for {fs.path}:")
+ log.debug(f"{tuple(contents.values())}")
+
+ found_files = []
+ for fnames, dtypes in common.filenames.items():
+ for fpath, fname in contents.items():
+ if fname in fnames:
+ found_files.append((dtypes, fpath))
+ break
+
+ return found_files
+
+def apply_file_metadata(gallery, found_files):
+ applied = False
+ cdata = common.common_data.copy()
+ for dtypes, fpath in found_files:
+ log.debug(f"path: {fpath}")
+ d = get_common_data(dtypes, fpath)
+ if d:
+ applied = True
+ cdata.update(d)
+
+ if applied:
+ apply_metadata(cdata, gallery)
+
+ return applied
+
+@hpx.attach("GalleryFS.parse_metadata_file")
+def parse(path, gallery):
+ f = has_file_metadata(path)
+ return apply_file_metadata(gallery, f)
+
+##### ---
+
+@hpx.attach("Metadata.info")
+def metadata_info():
+ return hpx.command.MetadataInfo(
+ identifier="filemetadata",
+ name="File Metadata",
+ description="Extracts and applies metadata from a file accompanying a gallery",
+ sites=("eze", "E-Hentai-Downloader", "HDoujinDownloader"),
+ models=(
+ hpx.command.GetDatabaseModel("Gallery"),
+ )
+ )
+
+@hpx.attach("Metadata.query", trigger='filemetadata')
+def query(itemtuple):
+ "Looks up files for matching items"
+ mdata = []
+
+ for mitem in itemtuple:
+ item = mitem.item
+ options = mitem.options
+
+ found_files = []
+ for s in item.get_sources():
+ found_files.extend(has_file_metadata(s))
+
+ log.info(f"found {len(found_files)} metadata files for item: {item}")
+
+ if found_files:
+ log.debug(f"{found_files}")
+
+ mdata.append(hpx.command.MetadataData(
+ metadataitem=mitem,
+ title=item.preferred_title.name if item.preferred_title else '',
+ data={
+ 'found': found_files,
+ }))
+
+ log.info(f"Returning {len(mdata)} data items")
+ return tuple(mdata)
+
+@hpx.attach("Metadata.apply", trigger='filemetadata')
+def apply(datatuple):
+ mresults = []
+ applied = False
+
+ for d in datatuple:
+ applied = apply_file_metadata(d.item, d.data['found'])
+ if applied:
+ mresults.append(hpx.command.MetadataResult(data=d, status=True))
+ else:
+ mresults.append(hpx.command.MetadataResult(data=d, status=False, reason="failed to apply data from file"))
+
+ return tuple(mresults)
diff --git a/plugins/File Metadata/readme.md b/plugins/File Metadata/readme.md
new file mode 100644
index 0000000..a8a7244
--- /dev/null
+++ b/plugins/File Metadata/readme.md
@@ -0,0 +1,83 @@
+File Metadata
+----------------------------
+
+> This plugin extracts and applies metadata from a file accompanying a gallery folder or archive.
+
+This plugin supports extracting metadata from files produced by:
+
+- [eze](https://dnsev-h.github.io/eze/)
+ > - only supports JSON format and file must be named `info.json`
+- [HDoujin Downloader](https://doujindownloader.com/)
+ > - all file versions are supported
+ > - supports both JSON and TXT formats
+ > - file must be named `info.json` or `info.txt`
+- [E-Hentai-Downloader](https://github.com/ccloli/E-Hentai-Downloader)
+ > - supports only the file named `info.txt`
+
+## Configuration
+
+Configure this plugin by adding `file-metadata` to the `plugin.config` namespace in your `config.yaml`:
+```yaml
+plugin:
+ config:
+ file-metadata:
+ option1: True
+ option2:
+ - item 1
+ - item 2
+```
+
+#### Available options
+
+Name | Default | Description
+--- | --- | ---
+`characters_namespace` | `character` | which namespace to put the values in the CHARACTERS field into (applies to hdoujin)
+
+# Extending
+
+Follow these steps to add support for more kind of files:
+
+1. Create a new enum member for your extractor in `extractors.common.DataType`
+2. Add a new filetype to `extractors.common.filetypes` if necessary
+3. Add your new enum member to `extractors.common.filenames`
+4. Create a new `.py` file in the `extractors` folder
+5. Import the `common` module and create a new `common.Extractor` subclass
+6. At the end of the file, register the subclass with `common.register_extractor`
+7. Import your new `.py` file in `extractors.__init__`
+
+# Changelog
+
+- `2.0.2`
+ - Improved error handling on incompatible detected files
+
+- `2.0.1`
+ - Fixed an issue where metadata files in archives would fail to be detected
+
+- `2.0.0`
+ - The plugin will also now act as a regular metadata plugin, making it possible to retrieve metadata from files on-demand
+
+- `1.0.3`
+ - Updated the eze handler to save uploaded date as published date
+
+- `1.0.2`
+ - Fixed a bug where not all metadata would be applied
+
+- `1.0.1`
+ - Updated the eze handler to support files produced by https://github.com/dnsev-h/ehentai-archive-info
+ - Fixed the extractors still using the old api
+
+- `1.0.0`
+ - Updated to reflect new changes in HPX v0.10.0
+
+- `0.3.0b`
+ - **HDoujin**: add option `characters_namespace`
+ - **HDoujin**: parse `PARODY` and `CHARACTERS` fields
+
+- `0.2.0b`
+ - require HPX `0.2.0`
+ - use new api to update gallery data
+ - add support for E-Hentai-Downloader
+ - fix bug where `info.txt` in archive files would fail to get parsed
+
+- `0.1.0b`
+ - first version
diff --git a/plugins/NHentai Downloader/hplugin.json b/plugins/NHentai Downloader/hplugin.json
new file mode 100644
index 0000000..8f59237
--- /dev/null
+++ b/plugins/NHentai Downloader/hplugin.json
@@ -0,0 +1,13 @@
+{
+ "id": "d2d70306-db03-4cc0-b9c6-b5b1f95d10fe",
+ "shortname": "nhentai-downloader",
+ "name": "NHentai Downloader",
+ "version": "1.0.1",
+ "description": "A plugin that enables downloading manga and doujinshi from nhentai.net",
+ "author": "Twiddly",
+ "update_url": "https://github.com/happypandax/plugins/tree/master/plugins/NHentai%20Downloader",
+ "website": "https://github.com/happypandax/plugins/tree/master/plugins/NHentai%20Downloader",
+ "entry": "main.py",
+ "test": "test.py",
+ "require": ["happypandax >= 0.12.0"]
+}
diff --git a/plugins/NHentai Downloader/main.py b/plugins/NHentai Downloader/main.py
new file mode 100644
index 0000000..14bc655
--- /dev/null
+++ b/plugins/NHentai Downloader/main.py
@@ -0,0 +1,152 @@
+# main.py
+import __hpx__ as hpx
+
+from bs4 import BeautifulSoup
+
+DownloadRequest = hpx.command.DownloadRequest
+
+log = hpx.get_logger("main")
+
+IDENTIFIER = "nhentai"
+HEADERS = {'user-agent':"Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0"}
+DEFAULT_DELAY = 1.5
+
+def website_url_regex_gen(domain, path_regex=None, variable_port=False, variable_tld=False, trailing_slash=True, end=True, trailing_fragment=True):
+ """
+ Generates a regex suitable for a specific domain
+ """
+ rgx = r"^(http\:\/\/|https\:\/\/)?(www\.)?({})".format(domain)
+ if variable_tld:
+ rgx += r"\.[a-z]{2,5}"
+ if variable_port:
+ rgx += r"(:[0-9]{1,5})?"
+ if trailing_slash:
+ rgx += r"\/?"
+ if path_regex:
+ rgx += path_regex
+ if trailing_slash:
+ rgx += r"\/?"
+ if trailing_fragment:
+ rgx += r"(#\S+)?"
+ if end:
+ rgx += "$"
+ return rgx
+
+@hpx.subscribe("init")
+def inited():
+ # set default delay if not set
+ delays = hpx.get_setting("network", "delays", {})
+ delay_url = "https://nhentai.net/g/"
+ if delay_url not in delays:
+ log.info(f"Setting delay on {delay_url} requests to {DEFAULT_DELAY}")
+ delays[delay_url] = DEFAULT_DELAY
+ hpx.update_setting("network", "delays", delays)
+
+@hpx.attach("Download.info")
+def download_info():
+ return hpx.command.DownloadInfo(
+ identifier = IDENTIFIER,
+ name = "NHentai",
+ parser = website_url_regex_gen("nhentai.net", path_regex=r"g\/[0-9]{3,10}", trailing_slash=True, variable_tld=False, trailing_fragment=True, end=True),
+ sites = ("https://nhentai.net",),
+ description = "Download manga and doujinshi from nhentai.net",
+ )
+
+@hpx.attach("Download.query", trigger=IDENTIFIER)
+def download_query(item):
+ """
+ Called to query for resource URLs that should be downloaded.
+ Note that HPX will handle the actual downloading part.
+ The attached handler should just return all the URLs that should be downloaded in the form of .:class:`DownloadRequest` objects
+
+ should return:
+ a tuple of :class:`DownloadRequest` for all the URL resources that should be downloaded.
+ Note that the download system is recursive, so if the URL resource matches a download handler (the same or a different one),
+ That handler will be called upon with a new :class:`DownloadItem` for that particular URL
+ (though only once, meaning, no handler will be called upon again with the exact same URL during a single session)
+ """
+ # prepare request
+ req_props = hpx.command.RequestProperties(
+ headers=HEADERS,
+ )
+ req = hpx.command.SingleGETRequest().request(item.url, req_props)
+
+ log.info(f"querying url: {item.url}")
+
+ download_requests = []
+
+ if req.ok:
+ log.info("request was successful")
+ # parse html page
+ soup = BeautifulSoup(req.text, "html.parser")
+
+ # get gallery information
+ log.info("parsing gallery info")
+ info_div = soup.find("div", id="info")
+ if info_div:
+ title_el = soup.find("h1", class_="title")
+ if title_el:
+ title_name = soup.find("span", class_="pretty")
+ if title_name:
+ item.name = str(title_name.string)
+ log.info(f"found name of gallery: {item.name}")
+ else:
+ log.warning("couldn't find gallery info div")
+
+ # get gallery cover url
+ cover_div = soup.find("div", id="cover")
+ if cover_div:
+ cover_img = cover_div.find("img")
+ if cover_img:
+ try:
+ download_requests.append(DownloadRequest(downloaditem=item, url=cover_img['data-src'], is_thumbnail=True))
+ except:
+ log.warning("failed to get cover src")
+
+ # get gallery page urls
+ thumbs_div = soup.find("div", id="thumbnail-container")
+ all_links = thumbs_div.findAll("a")
+ if all_links:
+ log.info(f"found {len(all_links)} thumbnail links")
+ for l in all_links:
+ # collect the urls to the page images
+ # nhentai has a simple url system where thumbs are stored at
+ # https://t.nhentai.net/galleries/1498842/2t.jpg
+ # and the real image at
+ # https://i.nhentai.net/galleries/1498842/2.jpg
+ url_parts = l.img['data-src'] # img is lazy loaded so src isn't available
+ if url_parts:
+ url_parts = url_parts.split('/')
+ img_id = url_parts[-2]
+ thumb_number = url_parts[-1]
+ img_number = thumb_number.replace('t', '')
+ # construct url for real image
+ img_url = "https://i.nhentai.net/galleries/{}/{}".format(img_id, img_number)
+ log.debug(f"final image url parsed to be: {img_url}")
+ # finally add the url to the list of requests for HPX downloader to take care of the rest
+ download_requests.append(DownloadRequest(downloaditem=item, url=img_url))
+ else:
+ log.warning("failed to get thumbnail src")
+ else:
+ log.warning("couldn't find any thumbnail links")
+ else:
+ log.warning("request failed")
+
+ if download_requests:
+ log.info(f"was able to prepare requests for {len(download_requests)} images")
+ return tuple(download_requests)
+
+@hpx.attach("Download.done", trigger=IDENTIFIER)
+def download_done(result):
+ """
+ Called when downloading of all :class:`DownloadRequest` for a specific :class:`DownloadItem` has finished.
+ The handler should do any post-processing here (archive files, rename files or folders, delete extranous files and etc.).
+ Remember to set the `status` property on the :class:`DownloadResult` object to `False` if the post-processing was a failure.
+ Note that the handler should *not* import the file into HPX (if it's an item), that part will be taken care of by HPX
+
+ should return:
+ the same :class:`DownloadResult` that was provided to the handler, potentially modified on the 'path' or `status` and `reason` properties
+ """
+ # there's nothing special to post-process in the case of nhentai downloader, so just return the result as is
+ log.info(f"download of images was successful for {result.downloaditem.name}")
+ return result
diff --git a/plugins/NHentai Downloader/readme.md b/plugins/NHentai Downloader/readme.md
new file mode 100644
index 0000000..f2b1790
--- /dev/null
+++ b/plugins/NHentai Downloader/readme.md
@@ -0,0 +1,20 @@
+NHentai Downloader
+----------------------------
+
+> A plugin that enables downloading manga and doujinshi from nhentai.net
+
+## Configuration
+
+There's no available config options for this plugin
+
+## Things yet to be implemented
+
+- Torrents (waiting for HPX to support this)
+
+# Changelog
+
+- `1.0.1`
+ - fixed an issue where galleries would fail to download because the extracted title was invalid
+
+- `1.0.0`
+ - first version
\ No newline at end of file
diff --git a/plugins/NHentai Downloader/test.py b/plugins/NHentai Downloader/test.py
new file mode 100644
index 0000000..36a317e
--- /dev/null
+++ b/plugins/NHentai Downloader/test.py
@@ -0,0 +1 @@
+# test.py
\ No newline at end of file
diff --git a/plugins/NHentai Metadata/hplugin.json b/plugins/NHentai Metadata/hplugin.json
new file mode 100644
index 0000000..78e3ea3
--- /dev/null
+++ b/plugins/NHentai Metadata/hplugin.json
@@ -0,0 +1,15 @@
+{
+ "id": "7d68901f-8cef-4f3c-82b1-6e93f63ba00c",
+ "shortname": "nhentai-metadata",
+ "name": "NHentai Metadata",
+ "version": "1.0.1",
+ "description": "A plugin that can fetch metadata from nhentai.net",
+ "author": "Twiddly",
+ "update_url": "https://github.com/happypandax/plugins/tree/master/plugins/NHentai%20Metadata",
+ "website": "https://github.com/happypandax/plugins/tree/master/plugins/NHentai%20Metadata",
+ "entry": "main.py",
+ "test": "test.py",
+ "require": [
+ "happypandax >= 0.10.0"
+ ]
+}
\ No newline at end of file
diff --git a/plugins/NHentai Metadata/main.py b/plugins/NHentai Metadata/main.py
new file mode 100644
index 0000000..74039da
--- /dev/null
+++ b/plugins/NHentai Metadata/main.py
@@ -0,0 +1,497 @@
+# main.py
+import __hpx__ as hpx
+import regex
+import arrow
+import datetime
+import os
+import urllib
+import html
+
+from bs4 import BeautifulSoup
+from PIL import Image, ImageChops
+
+log = hpx.get_logger("main")
+
+MATCH_URL_PREFIX = r"^(http\:\/\/|https\:\/\/)?(www\.)?" # http:// or https:// + www.
+MATCH_URL_END = r"\/?$"
+
+DEFAULT_DELAY = 1.5
+
+IDENTIFIER = "nhentai"
+
+URLS = {
+ 'nh': 'https://nhentai.net',
+ 'title_search': "https://nhentai.net/search/?q={title}"
+}
+
+HEADERS = {'user-agent':"Mozilla/5.0 (Windows NT 6.3; rv:36.0) Gecko/20100101 Firefox/36.0"}
+
+PLUGIN_CONFIG = {
+ 'filename_search': False, # use the filename/folder-name for searching instead of gallery title
+ 'remove_namespaces': True, # remove superfluous namespaces like 'artists', 'languages' and 'groups' because they are handled specially in HPX
+ 'gallery_results_limit': 10, # maximum amount of galleries to return
+ 'blacklist_tags': [], # tags to ignore when updating tags
+ 'add_gallery_url': True, # add nhentai url to gallery
+ 'preferred_language': "english", # preferred gallery langauge (in gallery title) to extract from if multiple galleries were found, set empty string for default
+ 'search_query': "{title}", # the search query, '{title}' will be replaced with the gallery title, use double curly brackets to escape a bracket
+}
+
+@hpx.subscribe("init")
+def inited():
+ PLUGIN_CONFIG.update(hpx.get_plugin_config())
+
+ # set default delay values if not set
+ delays = hpx.get_setting("network", "delays", {})
+ for u in (URLS['nh'],):
+ if u not in delays:
+ log.info(f"Setting delay on {u} requests to {DEFAULT_DELAY}")
+ delays[u] = DEFAULT_DELAY
+ hpx.update_setting("network", "delays", delays)
+
+@hpx.subscribe('config_update')
+def config_update(cfg):
+ PLUGIN_CONFIG.update(cfg)
+
+@hpx.attach("Metadata.info")
+def metadata_info():
+ return hpx.command.MetadataInfo(
+ identifier = IDENTIFIER,
+ name = "nhentai",
+ parser = MATCH_URL_PREFIX + r"(nhentai\.net\/g\/[0-9]{3,10})" + MATCH_URL_END,
+ sites = ("https://nhentai.net",),
+ description = "Fetch metadata from nhentai.net",
+ models = (
+ hpx.command.GetDatabaseModel("Gallery"),
+ )
+ )
+
+@hpx.attach("Metadata.query", trigger=IDENTIFIER)
+def query(itemtuple):
+ """
+ Called to query for candidates to extract metadata from.
+ Note that HPX will handle choosing which candidates to extract data from.
+ The attached handler should just return all the candidates found.
+ """
+ log.info("Querying nhentai for metadata")
+ mdata = []
+ for mitem in itemtuple:
+ item = mitem.item
+ url = mitem.url
+ gurls = [] # tuple of (title, url)
+ # url was provided
+ if url:
+ log.info(f"url provided: {url} for {item}")
+ gurls.append((url, url))
+ else: # manually search for id
+ log.info(f"url not provided for {item}")
+ # search with title
+ i_title = ""
+ if PLUGIN_CONFIG.get("filename_search"):
+ sources = item.get_sources()
+ if sources:
+ # get folder/file name
+ i_title = os.path.split(sources[0])[1]
+ # remove ext
+ i_title = os.path.splitext(i_title)[0]
+ else:
+ if item.titles:
+ i_title = item.titles[0].name # make user choice?
+ if i_title:
+ gurls = title_search(i_title)
+
+ log.info(f"found {len(gurls)} urls for item: {item}")
+
+ # list is sorted by date added so we reverse it
+ gurls.reverse()
+
+ log.debug(f"{gurls}")
+ final_gurls = []
+ pref_lang = PLUGIN_CONFIG.get('preferred_language')
+ if pref_lang:
+ for t in gurls:
+ if pref_lang.lower() in t[0].lower():
+ final_gurls.insert(0, t)
+ continue
+ final_gurls.append(t)
+ else:
+ final_gurls = gurls
+
+ for t, u in final_gurls:
+ g_id = parse_url(u)
+ if g_id:
+ mdata.append(hpx.command.MetadataData(
+ metadataitem = mitem,
+ title=t,
+ url=u,
+ data={
+ 'id': g_id,
+ 'gallery_url': u,
+ }))
+ return tuple(mdata)
+
+@hpx.attach("Metadata.apply", trigger=IDENTIFIER)
+def apply(datatuple):
+ """
+ Called to fetch and apply metadata to the given data items.
+ Remember to set the `status` property on the :class:`MetadataResult` object to `True` on a successful fetch.
+ """
+ log.info("Applying metadata from nhentai")
+ mresult = []
+
+ for mdata in datatuple:
+ applied = False
+ # prepare request
+ req_props = hpx.command.RequestProperties(
+ headers=HEADERS,
+ )
+
+ gallery_url = mdata.data['gallery_url']
+
+ r = hpx.command.SingleGETRequest().request(gallery_url, req_props)
+ if r.ok:
+ response = r.text
+ if response and not '404 – Not Found' in response:
+ filtered_data = format_metadata(response, mdata.item, apply_url=PLUGIN_CONFIG.get('add_gallery_url', True), gallery_url=gallery_url)
+ applied = apply_metadata(filtered_data, mdata.item, mdata.options)
+ elif response:
+ log.debug(response)
+ mresult.append(hpx.command.MetadataResult(data=mdata, status=applied))
+ log.info(f"Applied: {applied}")
+ else:
+ log.warning(f"Request returned bad status: {r.status_code}")
+ return tuple(mresult)
+
+def title_search(title, _times=0):
+ "Searches on nhentai for galleries with given title, returns a list of (title, matching gallery urls)"
+ search_url = URLS['title_search']
+ log.debug(f"searching with title: {title}")
+
+ sq = PLUGIN_CONFIG.get("search_query")
+ try:
+ sq = sq.format(title=title)
+ except:
+ log.warning("Failed to use customized search query")
+ sq = title
+
+ log.info(f"Final search query: {sq}")
+
+ f_url = search_url.format(
+ title=urllib.parse.quote_plus(sq)
+ )
+
+ log.debug(f"final url: {f_url}")
+
+ r = page_results(f_url)
+
+ if not r and not _times:
+ title = regex.sub(r"\(.+?\)|\[.+?\]", "", title)
+ title = " ".join(title.split())
+ r = title_search(title, _times=_times+1)
+ return r
+
+def page_results(page_url, limit=None):
+ "Opens nhentai page, parses for results, and then returns list of (title, url)"
+ found_urls = [] # title, url
+ if limit is None:
+ limit = PLUGIN_CONFIG.get("gallery_results_limit")
+
+ # prepare request
+ req_props = hpx.command.RequestProperties(
+ headers=HEADERS,
+ )
+ r = hpx.command.SingleGETRequest().request(page_url, req_props)
+ r.raise_for_status()
+ soup = BeautifulSoup(r.text, "html.parser")
+ results = soup.findAll("div", class_="gallery", limit=limit)
+ for x in results:
+ # str(x.a.string)
+ t = ""
+ cap = x.find("div", class_="caption")
+ if cap:
+ t = str(cap.string)
+ u = URLS['nh'] + x.a['href']
+ found_urls.append((t or u, u))
+
+ if not found_urls:
+ log.warning(f"No results found on url: {page_url}")
+ log.debug(f"HTML: {r.text}")
+ return found_urls
+
+def parse_url(url):
+ "Extracts the gallery id from url"
+ gallery_id = None
+
+ gallery_id_token = regex.search('(?<=g/)([0-9]+)', url)
+ if gallery_id_token:
+ gallery_id = gallery_id_token.group()
+ else:
+ log.warning("Error extracting gallery id from url: {}".format(url))
+ return gallery_id
+
+
+def capitalize_text(text):
+ """
+ better str.capitalize
+ """
+ return " ".join(x.capitalize() for x in text.strip().split())
+
+def format_metadata(text, item, apply_url=False, gallery_url=None):
+ """
+ Formats metadata to look like this for apply_metadata:
+ data = {
+ 'titles': None, # [(title, language),...]
+ 'artists': None, # [(artist, (circle, circle, ..)),...]
+ 'parodies': None, # [parody, ...]
+ 'category': None,
+ 'tags': None, # [tag, tag, tag, ..] or {ns:[tag, tag, tag, ...]}
+ 'pub_date': None, # DateTime object or Arrow object
+ 'language': None,
+ 'urls': None # [url, ...]
+ }
+ """
+ mdata = {}
+
+ soup = BeautifulSoup(text, "html.parser")
+ info_div = soup.find("div", id="info")
+ if info_div:
+
+ mdata['titles'] = []
+
+ parsed_text = None
+ eng_title = info_div.find("h1")
+ if eng_title:
+ eng_title = str(eng_title.text)
+ parsed_text = hpx.command.ItemTextParser(eng_title)
+
+ parsed_title = parsed_text.extract_title()
+ if parsed_title:
+ parsed_title = parsed_title[0]
+
+ mdata['titles'].append((parsed_title or eng_title, 'english'))
+
+ jp_title = info_div.find("h2")
+ if jp_title:
+ mdata['titles'].append((str(jp_title.text), 'japanese'))
+
+ parsed_artists = parsed_text.extract_artist() if parsed_text else []
+ parsed_circles = parsed_text.extract_circle() if parsed_text else []
+
+ artists = set()
+ circles = set()
+ parodies = set()
+
+ lang = "japanese" # default language
+
+ tags_containers = info_div.find("section", id="tags")
+ if tags_containers:
+ extranous_namespaces = ("artists", "categories", "parodies", "groups", "languages")
+ blacklist_tags = [x.lower() for x in PLUGIN_CONFIG.get("blacklist_tags")]
+ for tag_container in tags_containers.findAll("div", class_="tag-container"):
+ ns = list(tag_container.stripped_strings)[0]
+ if not ns:
+ continue
+ ns = ns[:-1] # remove colon
+ ns = ns.lower()
+ tags = [list(x.stripped_strings)[0] for x in tag_container.findAll("a", class_="tag")]
+
+ nstag = lambda t: ns + ':' + t
+
+ if ns == "artists":
+ for t in tags:
+ if blacklist_tags and nstag(t) in blacklist_tags:
+ continue
+ for a in parsed_artists: # the artist extracted from the title likely has better capitalization, so choose that instead
+ if a.lower() == t.lower():
+ artists.add(a)
+ break
+ else:
+ artists.add(t)
+ elif ns == "groups":
+ for t in tags:
+ if blacklist_tags and nstag(t) in blacklist_tags:
+ continue
+ for a in parsed_circles: # the circle extracted from the title likely has better capitalization, so choose that instead
+ if a.lower() == t.lower():
+ circles.add(a)
+ break
+ else:
+ circles.add(t)
+ elif ns == "parodies":
+ for t in tags:
+ if blacklist_tags and nstag(t) in blacklist_tags:
+ continue
+ parodies.add(t)
+ elif ns == "categories":
+ t = tags[0] # only supports one
+ if not (blacklist_tags and nstag(t) in blacklist_tags):
+ mdata['category'] = capitalize_text(t)
+ elif ns == "languages":
+ for t in tags:
+ if blacklist_tags and nstag(t) in blacklist_tags:
+ continue
+ if t in ('translated'):
+ continue
+ lang = t # only supports one
+
+ if PLUGIN_CONFIG.get("remove_namespaces") and ns in extranous_namespaces:
+ if ns == 'languages': # keep other tags
+ tags = [x for x in tags if x != lang]
+ else:
+ continue
+
+ # add rest as tags
+ if tags:
+ mdata.setdefault('tags', {})
+ for t in tags:
+ if blacklist_tags and nstag(t) in blacklist_tags:
+ continue
+ if ns == 'tags':
+ mdata['tags'].setdefault(None, []).append(t)
+ else:
+ mdata['tags'].setdefault(ns, []).append(t)
+
+ mdata['language'] = lang
+
+ if not artists:
+ artists.union(set(parsed_artists))
+ if not circles:
+ circles.union(set(parsed_circles))
+
+ if parodies:
+ mdata['parodies'] = parodies
+
+ if artists:
+ a_circles = []
+ for a in artists:
+ a_circles.append((a, tuple(circles))) # assign circles to each artist
+ mdata['artists'] = a_circles
+
+ if apply_url:
+ mdata['urls'] = [gallery_url]
+
+ log.debug(f"formatted data: {mdata}")
+
+ return mdata
+
+GalleryData = hpx.command.GalleryData
+LanguageData = hpx.command.LanguageData
+TitleData = hpx.command.TitleData
+ArtistData = hpx.command.ArtistData
+ArtistNameData = hpx.command.ArtistNameData
+ParodyData = hpx.command.ParodyData
+ParodyNameData = hpx.command.ParodyNameData
+CircleData = hpx.command.CircleData
+CategoryData = hpx.command.CategoryData
+UrlData = hpx.command.UrlData
+NamespaceTagData= hpx.command.NamespaceTagData
+TagData= hpx.command.TagData
+NamespaceData = hpx.command.NamespaceData
+
+def apply_metadata(data, gallery, options):
+ """
+ data = {
+ 'titles': None, # [(title, language),...]
+ 'artists': None, # [(artist, (circle, circle, ..)),...]
+ 'parodies': None, # [parody, ...]
+ 'category': None,
+ 'tags': None, # [tag, tag, tag, ..] or {ns:[tag, tag, tag, ...]}
+ 'pub_date': None, # DateTime object or Arrow object
+ 'language': None,
+ 'urls': None # [url, ...]
+ }
+ """
+
+ log.debug(f"data: {data}")
+
+ gdata = GalleryData()
+
+ if isinstance(data.get('titles'), (list, tuple, set)):
+ gtitles = []
+ for t, l in data['titles']:
+ gtitle = None
+ if t:
+ t = html.unescape(t)
+ gtitle = TitleData(name=t)
+ if t and l:
+ gtitle.language = LanguageData(name=l)
+ if gtitle:
+ gtitles.append(gtitle)
+
+ if gtitles:
+ gdata.titles = gtitles
+ log.debug("applied titles")
+
+ if isinstance(data.get('artists'), (list, tuple, set)):
+ gartists = []
+ for a, c in data['artists']:
+ if a:
+ gartist = ArtistData(names=[ArtistNameData(name=capitalize_text(a))])
+ gartists.append(gartist)
+
+ if c:
+ gcircles = []
+ for circlename in [x for x in c if x]:
+ gcircles.append(CircleData(name=capitalize_text(circlename)))
+ gartist.circles = gcircles
+
+ if gartists:
+ gdata.artists = gartists
+ log.debug("applied artists")
+
+ if isinstance(data.get('parodies'), (list, tuple, set)):
+ gparodies = []
+ for p in data['parodies']:
+ if p:
+ gparody = ParodyData(names=[ParodyNameData(name=capitalize_text(p))])
+ gparodies.append(gparody)
+
+ if gparodies:
+ gdata.parodies = gparodies
+ log.debug("applied parodies")
+
+ if data.get('category'):
+ gdata.category = CategoryData(name=data['category'])
+ log.debug("applied category")
+
+ if data.get('language'):
+ gdata.language = LanguageData(name=data['language'])
+ log.debug("applied language")
+
+ if isinstance(data.get('tags'), (dict, list)):
+ if isinstance(data['tags'], list):
+ data['tags'] = {None: data['tags']}
+ gnstags = []
+ for ns, tags in data['tags'].items():
+ if ns is not None:
+ ns = ns.strip()
+ for t in tags:
+ t = t.strip()
+ if t:
+ kw = {'tag': TagData(name=t)}
+ if ns:
+ kw['namespace'] = NamespaceData(name=ns)
+ gnstags.append(NamespaceTagData(**kw))
+
+ if gnstags:
+ gdata.tags = gnstags
+ log.debug("applied tags")
+
+ if isinstance(data.get('pub_date'), (datetime.datetime, arrow.Arrow)):
+ pub_date = data['pub_date']
+ gdata.pub_date = pub_date
+ log.debug("applied pub_date")
+
+ if isinstance(data.get('urls'), (list, tuple)):
+ gurls = []
+ for u in data['urls']:
+ if u:
+ gurls.append(UrlData(name=u))
+ if gurls:
+ gdata.urls = gurls
+ log.debug("applied urls")
+
+ applied = hpx.command.UpdateItemData(gallery, gdata, options=options)
+
+ log.debug(f"applied: {applied}")
+
+ return applied
\ No newline at end of file
diff --git a/plugins/NHentai Metadata/readme.md b/plugins/NHentai Metadata/readme.md
new file mode 100644
index 0000000..fdf7f71
--- /dev/null
+++ b/plugins/NHentai Metadata/readme.md
@@ -0,0 +1,38 @@
+NHentai Metadata
+----------------------------
+
+> This plugin fetches metadata from nhentai.net
+
+## Configuration
+
+Configure this plugin by adding `nhentai-metadata` to the `plugin.config` namespace in your `config.yaml`:
+```yaml
+plugin:
+ config:
+ nhentai-metadata:
+ option1: True
+ option2:
+ - item 1
+ - item 2
+```
+
+#### Available options
+
+Name | Default | Description
+--- | --- | ---
+`filename_search` | `true` | use the filename/folder-name for searching instead of gallery title
+`remove_namespaces` | `true` | remove superfluous namespaces like 'artists', 'languages' and 'groups' and so on because they are handled specially in HPX
+`gallery_results_limit` | `10` | maximum amount of galleries to return
+`blacklist_tags` | `[]` | tags to ignore when updating tags, a list of `namespace:tag` strings
+`add_gallery_url` | `true` | add ehentai url to gallery
+`preferred_language` | `"english"` | preferred gallery language (in gallery title) to extract from if multiple galleries were found, set empty string for default
+`search_query` | `"{title}"` | the search query, '{title}' will be replaced with the gallery title, use double curly brackets to escape a curly bracket. Tip: if you want to only allow english results, you should modify this to "{title} language:english"
+
+
+# Changelog
+
+- `1.0.1`
+ - updated to reflect site changes where titles where not geting extracted
+
+- `1.0.0`
+ - first version
\ No newline at end of file
diff --git a/plugins/NHentai Metadata/test.py b/plugins/NHentai Metadata/test.py
new file mode 100644
index 0000000..36a317e
--- /dev/null
+++ b/plugins/NHentai Metadata/test.py
@@ -0,0 +1 @@
+# test.py
\ No newline at end of file
diff --git a/version/File Metadata/common.py b/version/File Metadata/common.py
deleted file mode 100644
index 4a065c9..0000000
--- a/version/File Metadata/common.py
+++ /dev/null
@@ -1,69 +0,0 @@
-import __hpx__ as hpx
-import enum
-import json
-
-log = hpx.get_logger(__name__)
-
-class DataType(enum.Enum):
- eze = 1
- hdoujin = 2
-
-filetypes = ('.json', '.txt')
-filenames = {
- "info.json": (DataType.eze, DataType.hdoujin),
- "info.txt": (DataType.hdoujin,)
- }
-
-common_data = {
- 'titles': None, # [(title, language),...]
- 'artists': None, # [(artist, (circle, circle, ..)),...]
- 'category': None,
- 'tags': None, # [tag, tag, tag, ..] or {ns:[tag, tag, tag, ...]}
- 'pub_date': None, # DateTime object
- 'language': None,
- 'urls': None # [url, ...]
-}
-
-extractors = {}
-
-def capitalize_text(text):
- """
- better str.capitalize
- """
- return " ".join(x.capitalize() for x in text.strip().split())
-
-def register_extractor(cls, type):
- assert issubclass(cls, Extractor)
- assert isinstance(type, DataType)
- extractors[type] = cls()
-
-class Extractor:
- """
- """
-
- def file_to_dict(self, fs: hpx.command.CoreFS) -> dict:
- """
- """
- d = {}
- log.debug(f"File ext: {fs.ext}")
- kw = {}
- if not fs.inside_archive:
- kw['encoding'] = 'utf-8'
- if fs.ext.lower() == '.json':
- with fs.open("r", **kw) as f:
- d = json.load(f)
- elif fs.ext.lower() == '.txt':
- with fs.open("r", **kw) as f:
- for line in f.readlines():
- l = line.strip()
- k, v = l.split(':', 1)
- if k.strip():
- d[k.strip()] = v.strip()
- else:
- raise NotImplementedError(f"{fs.ext} filetype not supported yet")
- return d
-
- def extract(self, filedata: dict) -> dict:
- """
- """
- raise NotImplementedError
\ No newline at end of file
diff --git a/version/File Metadata/hplugin.json b/version/File Metadata/hplugin.json
deleted file mode 100644
index 3d9b165..0000000
--- a/version/File Metadata/hplugin.json
+++ /dev/null
@@ -1,13 +0,0 @@
-{
- "id": "e38e24e4-8ca8-420e-b52b-c75510097653",
- "shortname": "file-metadata",
- "name": "File Metadata",
- "version": "0.1.0b",
- "description": "Extracts and applies metadata from a file accompanying a gallery. Supports files produced from eze and hdoujin",
- "author": "Twiddly",
- "website": "https://github.com/happypandax/plugins",
- "entry": "main.py",
- "require": [
- "happypandax >= 0.1.2"
- ]
-}
\ No newline at end of file
diff --git a/version/File Metadata/main.py b/version/File Metadata/main.py
deleted file mode 100644
index e20214b..0000000
--- a/version/File Metadata/main.py
+++ /dev/null
@@ -1,127 +0,0 @@
-import __hpx__ as hpx
-import os
-import arrow
-import datetime
-import common
-import extractors
-
-log = hpx.get_logger(__name__)
-
-options = {
-}
-
-def get_common_data(datatypes, fpath):
- assert isinstance(datatypes, tuple)
- d = {}
- fpath = hpx.command.CoreFS(fpath)
-
- for datatype in datatypes:
- md = {}
-
- ex = common.extractors.get(datatype, None)
- if ex:
- fdata = ex.file_to_dict(fpath)
- if fdata:
- md.update(ex.extract(fdata))
- if md:
- d.update(md)
- break
- return d
-
-language_model = hpx.command.GetModelClass("Language")
-title_model = hpx.command.GetModelClass("Title")
-artist_model = hpx.command.GetModelClass("Artist")
-circle_model = hpx.command.GetModelClass("Circle")
-category_model = hpx.command.GetModelClass("Category")
-artistname_model = hpx.command.GetModelClass("ArtistName")
-url_model = hpx.command.GetModelClass("Url")
-namespacetags_model = hpx.command.GetModelClass("NamespaceTags")
-
-def apply_metadata(data, gallery):
- applied = False
-
- log.debug("data:")
- log.debug(f"{data}")
-
- if isinstance(data['titles'], (list, tuple)):
- for t, l in data['titles']:
- if t:
- gtitle = title_model(name=t)
- gallery.titles.append(gtitle)
- if t and l:
- gtitle.language = language_model.as_unique(name=l)
- applied = True
-
- if isinstance(data['artists'], (list, tuple)):
- for a, c in data['artists']:
- if a:
- gartist = artist_model.as_unique(name=a)
- if not gartist in gallery.artists:
- gallery.artists.append(gartist)
- if a and c:
- for circlename in [x for x in c if x]:
- gcircle = circle_model.as_unique(name=circlename)
- if not gcircle in gartist.circles:
- gartist.circles.append(gcircle)
- applied = True
-
- if data['category']:
- gcat = category_model.as_unique(name=data['category'])
- gallery.category = gcat
- applied = True
-
- if data['language']:
- glang = language_model.as_unique(name=data['language'])
- gallery.language = glang
- applied = True
-
- if isinstance(data['tags'], (dict, list)):
- if isinstance(data['tags'], list):
- data['tags'] = {None: data['tags']}
- ns_tags = []
- for ns, tags in data['tags'].items():
- if ns is not None:
- ns = ns.strip()
- if ns and ns.lower() == 'misc':
- ns = None
- for t in tags:
- t = t.strip()
- ns_tags.append(namespacetags_model.as_unique(ns=ns, tag=t))
-
- for nstag in ns_tags:
- if not nstag in gallery.tags:
- gallery.tags.append(nstag)
- applied = True
-
- if isinstance(data['pub_date'], (datetime.datetime, arrow.Arrow)):
- pub_date = data['pub_date']
- if isinstance(pub_date, datetime.datetime):
- pub_date = arrow.Arrow.fromdatetime(pub_date)
- gallery.pub_date = pub_date
- applied = True
-
- if isinstance(data['urls'], (list, tuple)):
- for u in data['urls']:
- gallery.urls.append(url_model(name=u))
- applied = True
-
- return applied
-
-@hpx.attach("GalleryFS.parse_metadata_file")
-def parse(path, gallery):
- fs = hpx.command.CoreFS(path)
-
- contents = {x: os.path.split(x)[1].lower() for x in fs.contents(corefs=False) if x.lower().endswith(common.filetypes)}
- log.debug(f"Contents for {fs.path}:")
- log.debug(f"{tuple(contents.values())}")
-
- cdata = common.common_data.copy()
-
- for fnames, dtypes in common.filenames.items():
- for fpath, fname in contents.items():
- if fname in fnames:
- log.debug(f"path: {fpath}")
- cdata.update(get_common_data(dtypes, fpath))
- break
-
- return apply_metadata(cdata, gallery)
\ No newline at end of file