diff --git a/.gitignore b/.gitignore index 9811a74..c60a91d 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,5 @@ /runner/node_modules /runner/.env /runner/.env.local + +/.idea/* diff --git a/composer.json b/composer.json index db5478b..253565b 100644 --- a/composer.json +++ b/composer.json @@ -25,7 +25,8 @@ "symfony/dependency-injection": "^5.4 || ^6.0 || ^7.0", "symfony/http-kernel": "^5.4 || ^6.0 || ^7.0", "terminal42/contao-url-rewrite": "^1.7", - "composer-runtime-api": "^2.1" + "composer-runtime-api": "^2.1", + "emagister/sitemap-php": "^0.1" }, "require-dev": { "bamarni/composer-bin-plugin": "^1.5", diff --git a/config/services.yaml b/config/services.yaml index 55149c2..34b4de4 100644 --- a/config/services.yaml +++ b/config/services.yaml @@ -24,3 +24,12 @@ services: Pdir\ContaoSeoPlugin\EventListener\OutputFrontendTemplateListener: public: true + + Pdir\ContaoSeoPlugin\EventListener\RemoveOldFeedsListener: + public: true + + Pdir\ContaoSeoPlugin\EventListener\RobotsTxtListener: + public: true + + Pdir\ContaoSeoPlugin\Backend\Automator: + public: true diff --git a/contao/config/config.php b/contao/config/config.php index 065b7b1..4af2cda 100644 --- a/contao/config/config.php +++ b/contao/config/config.php @@ -1,5 +1,6 @@ ['tl_error_log'] ]; +$GLOBALS['TL_PURGE']['custom']['sitemap'] = ['callback' => [Automator::class, 'generateSitemapFiles']]; + /* * Permissions */ diff --git a/contao/dca/tl_page.php b/contao/dca/tl_page.php index e003a99..95555cd 100644 --- a/contao/dca/tl_page.php +++ b/contao/dca/tl_page.php @@ -21,12 +21,11 @@ // Add new legend PaletteManipulator::create() ->addLegend('contao_seo_legend', 'meta_legend') - ->addField(['contaoSeoActivateErrorLog', 'contaoSeoActivateIndexNow'], 'contao_seo_legend', PaletteManipulator::POSITION_APPEND) + ->addField(['contaoSeoActivateErrorLog', 'contaoSeoActivateIndexNow', 'contaoSeoActivateSitemap'], 'contao_seo_legend', PaletteManipulator::POSITION_APPEND) ->applyToPalette('root', 'tl_page') ->applyToPalette('rootfallback', 'tl_page') ; - // Add to routing legend PaletteManipulator::create() ->addField('urlRewriteList', 'routePriority') @@ -42,7 +41,9 @@ $GLOBALS['TL_DCA']['tl_page']['palettes']['__selector__'][] = 'contaoSeoActivateIndexNow'; +$GLOBALS['TL_DCA']['tl_page']['palettes']['__selector__'][] = 'contaoSeoActivateSitemap'; $GLOBALS['TL_DCA']['tl_page']['subpalettes']['contaoSeoActivateIndexNow'] = 'contaoSeoIndexNowEngines,contaoSeoIndexNowKey'; +$GLOBALS['TL_DCA']['tl_page']['subpalettes']['contaoSeoActivateSitemap'] = 'contaoSeoSitemapName,contaoSeoAddSitemapToRobotsTxt'; // add fields $GLOBALS['TL_DCA']['tl_page']['fields']['contaoSeoToolbar'] = [ @@ -89,3 +90,22 @@ 'eval' => ['mandatory'=>false, 'basicEntities'=>true, 'maxlength'=>255, 'tl_class'=>'w50'], 'sql' => "varchar(255) NOT NULL default ''" ]; + +$GLOBALS['TL_DCA']['tl_page']['fields']['contaoSeoActivateSitemap'] = [ + 'inputType' => 'checkbox', + 'eval' => ['doNotCopy'=>true, 'submitOnChange'=>true, 'tl_class' => 'w50 clr'], + 'sql' => ['type' => 'boolean', 'default' => false] +]; + +$GLOBALS['TL_DCA']['tl_page']['fields']['contaoSeoSitemapName'] = [ + 'search' => true, + 'inputType' => 'text', + 'eval' => ['mandatory'=>true, 'minlength' => 1, 'maxlength'=>128, 'tl_class'=>'w50'], + 'sql' => "varchar(128) NOT NULL default ''" +]; + +$GLOBALS['TL_DCA']['tl_page']['fields']['contaoSeoAddSitemapToRobotsTxt'] = [ + 'inputType' => 'checkbox', + 'eval' => ['doNotCopy'=>true, 'submitOnChange'=>true, 'tl_class' => 'w50 clr'], + 'sql' => ['type' => 'boolean', 'default' => false] +]; diff --git a/contao/languages/de/tl_maintenance.php b/contao/languages/de/tl_maintenance.php index 51337dd..e2e72b7 100644 --- a/contao/languages/de/tl_maintenance.php +++ b/contao/languages/de/tl_maintenance.php @@ -3,3 +3,4 @@ declare(strict_types=1); $GLOBALS['TL_LANG']['tl_maintenance_jobs']['error_log'] = ['404 Error Log löschen', 'Leert die Tabelle tl_error_log, in der 404 Fehlermeldungen gespeichert werden. Die Daten werden hierdurch endgültig gelöscht.']; +$GLOBALS['TL_LANG']['tl_maintenance_jobs']['sitemap'] = ['SEO Plugin Sitemap neu schreiben', 'Schreibt die SEO Plugin Sitemap-XML-Dateien im Ordner share neu und leert anschließend den Shared-Cache, damit keine ungültigen Links zurückbleiben.']; diff --git a/contao/languages/de/tl_page.php b/contao/languages/de/tl_page.php index 0c62119..4585462 100644 --- a/contao/languages/de/tl_page.php +++ b/contao/languages/de/tl_page.php @@ -14,3 +14,6 @@ $GLOBALS['TL_LANG']['tl_page']['seznam.cz'] = 'Seznam.cz'; $GLOBALS['TL_LANG']['tl_page']['yandex'] = 'Yandex'; $GLOBALS['TL_LANG']['tl_page']['yep'] = 'Yep'; +$GLOBALS['TL_LANG']['tl_page']['contaoSeoActivateSitemap'] = ['Sitemap aktivieren', 'Wähle hier ob das SEO Plugin eine eigene Sitemap erstellen soll.']; +$GLOBALS['TL_LANG']['tl_page']['contaoSeoSitemapName'] = ['Sitemap-Name', 'Gib hier einen Namen für die Sitemap an. Bsp. [SITEMAP-NAME]sitemap.xml bzw. [SITEMAP-NAME]sitemap-index.xml.D ie Sitemap-Dateien werden im Ordner share abgelegt.']; +$GLOBALS['TL_LANG']['tl_page']['contaoSeoAddSitemapToRobotsTxt'] = ['Sitemap zur robots.txt hinzufügen', 'Wähle hier ob die Sitemap zur robots.txt hinzugefügt werden soll.']; diff --git a/contao/languages/en/tl_maintenance.php b/contao/languages/en/tl_maintenance.php index 38e3f23..f093ac7 100644 --- a/contao/languages/en/tl_maintenance.php +++ b/contao/languages/en/tl_maintenance.php @@ -3,3 +3,4 @@ declare(strict_types=1); $GLOBALS['TL_LANG']['tl_maintenance_jobs']['error_log'] = ['Delete 404 error log', 'Clears the table tl_error_log, in which 404 error messages are stored. The data will be permanently deleted.']; +$GLOBALS['TL_LANG']['tl_maintenance_jobs']['sitemap'] = ['Rewrite SEO Plugin Sitemap', 'Rewrites the SEO Plugin Sitemap XML files in the share folder and then clears the shared cache so that no invalid links are left behind']; diff --git a/contao/languages/en/tl_page.php b/contao/languages/en/tl_page.php index 7a31328..bd12644 100644 --- a/contao/languages/en/tl_page.php +++ b/contao/languages/en/tl_page.php @@ -14,3 +14,9 @@ $GLOBALS['TL_LANG']['tl_page']['seznam.cz'] = 'Seznam.cz'; $GLOBALS['TL_LANG']['tl_page']['yandex'] = 'Yandex'; $GLOBALS['TL_LANG']['tl_page']['yep'] = 'Yep'; +$GLOBALS['TL_LANG']['tl_page']['contaoSeoActivateSitemap'] = ['Sitemap aktivieren', 'Wähle hier ob das SEO Plugin eine eigene Sitemap erstellen soll.']; +$GLOBALS['TL_LANG']['tl_page']['contaoSeoSitemapName'] = ['Sitemap-Name', 'Gib hier einen Namen für die Sitemap an. Bsp. [SITEMAP-NAME]sitemap.xml bzw. [SITEMAP-NAME]sitemap-index.xml.']; +$GLOBALS['TL_LANG']['tl_page']['contaoSeoAddSitemapToRobotsTxt'] = ['Sitemap zur robots.txt hinzufügen', 'Wähle hier ob die Sitemap zur robots.txt hinzugefügt werden soll.']; +$GLOBALS['TL_LANG']['tl_page']['contaoSeoActivateSitemap'] = ['Activate sitemap', 'Select here whether the SEO plugin should create its own sitemap']; +$GLOBALS['TL_LANG']['tl_page']['contaoSeoSitemapName'] = ['Sitemap name', 'Enter a name for the sitemap here. E.g. [SITEMAP-NAME]sitemap.xml or [SITEMAP-NAME]sitemap-index.xml. The sitemap files are stored in the share folder.']; +$GLOBALS['TL_LANG']['tl_page']['contaoSeoAddSitemapToRobotsTxt'] = ['Add sitemap to robots.txt', 'Select here whether the sitemap should be added to robots.txt']; diff --git a/src/Backend/Automator.php b/src/Backend/Automator.php new file mode 100644 index 0000000..6163fca --- /dev/null +++ b/src/Backend/Automator.php @@ -0,0 +1,219 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Pdir\ContaoSeoPlugin\Backend; + +use Contao\Automator as ContaoAutomator; +use Contao\ArticleModel; +use Contao\CoreBundle\Routing\Page\PageRegistry; +use Contao\CoreBundle\Security\ContaoCorePermissions; +use Contao\File; +use Contao\Folder; +use Contao\PageModel; +use Contao\StringUtil; +use Contao\System; +use SitemapPHP\Sitemap; + +class Automator extends System +{ + private PageRegistry $pageRegistry; + private readonly ContaoAutomator $contaoAutomator; + + public function __construct(PageRegistry $pageRegistry) { + parent::__construct(); + $this->contaoAutomator = new ContaoAutomator(); + $this->pageRegistry = $pageRegistry; + } + + /** + * Regenerate the XML files. + */ + public function generateSitemapFiles(): void + { + $shareDir = System::getContainer()->getParameter('contao.web_dir') . '/share'; + $rootPages = $this->getRootPages(); + + foreach($rootPages as $rootPage) { + $loc = (1 === $rootPage->useSSL? 'https://' : 'http://').$rootPage->dns; + + $sitemap = new Sitemap(''); # $sitemap = new Sitemap($loc); + $sitemap->setPath($shareDir.'/'.$rootPage->contaoSeoSitemapName); + + # get urls + $urls = $this->getUrls($rootPage); + + foreach($urls as $url) { + $sitemap->addItem($url); + # @todo add priority $sitemap->addItem($url, '0.6', 'weekly', $post['created_at']); + } + + #if (50000 <= \count($urls)) { + $sitemap->createSitemapIndex($loc.'/share/'.$rootPage->contaoSeoSitemapName, 'Today'); + #} + } + + // Also empty the shared cache so there are no links to deleted files + $this->contaoAutomator->purgePageCache(); + + System::getContainer()->get('monolog.logger.contao.cron')->info('Regenerated the SEO Plugin sitemap XML file(s)'); + } + + public static function getRootPages() + { + $pageModel = System::getContainer()->get('contao.framework')->getAdapter(PageModel::class); + $rootPages = $pageModel->findBy(['contaoSeoActivateSitemap=?','dns!=?'], [1, '']); + + if (null === $rootPages) { + System::getContainer()->get('monolog.logger.contao.cron')->warning('No active root pages found for sitemap generation.'); + } + + return $rootPages; + } + + private function getUrls($rootPage): array + { + $urls = []; + + $pages = $this->getPageAndArticleUrls((int) $rootPage->id); + $urls[] = $this->callLegacyHook($rootPage, $pages); + + return array_unique(array_merge(...$urls)); + } + + private function callLegacyHook(PageModel $rootPage, array $pages): array + { + $systemAdapter = System::getContainer()->get('contao.framework')->getAdapter(System::class); + + // HOOK: take additional pages + if (isset($GLOBALS['TL_HOOKS']['contaoSeoGetSearchablePages']) && \is_array($GLOBALS['TL_HOOKS']['contaoSeoGetSearchablePages'])) { + foreach ($GLOBALS['TL_HOOKS']['contaoSeoGetSearchablePages'] as $callback) { + $pages = $systemAdapter->importStatic($callback[0])->{$callback[1]}($pages, $rootPage->id, true, $rootPage->language); + } + } + + return $pages; + } + + private function getPageAndArticleUrls(int $parentPageId): array + { + $pageModelAdapter = System::getContainer()->get('contao.framework')->getAdapter(PageModel::class); + + // Since the publication status of a page is not inherited by its child + // pages, we have to use findByPid() instead of findPublishedByPid() and + // filter out unpublished pages in the foreach loop (see #2217) + $pageModels = $pageModelAdapter->findByPid($parentPageId, ['order' => 'sorting']); + + if (null === $pageModels) { + return []; + } + + $articleModelAdapter = System::getContainer()->get('contao.framework')->getAdapter(ArticleModel::class); + + $result = []; + + // Recursively walk through all subpages + foreach ($pageModels as $pageModel) { + // Load details in order to inherit permission settings (see #5556) + $pageModel->loadDetails(); + + if ($pageModel->protected && !System::getContainer()->get('security.authorization_checker')->isGranted(ContaoCorePermissions::MEMBER_IN_GROUPS, $pageModel->groups)) { + continue; + } + + $isPublished = $pageModel->published && (!$pageModel->start || $pageModel->start <= time()) && (!$pageModel->stop || $pageModel->stop > time()); + + if ( + $isPublished + && !$pageModel->requireItem + && 'noindex,nofollow' !== $pageModel->robots + && $this->pageRegistry->supportsContentComposition($pageModel) + && $this->pageRegistry->isRoutable($pageModel) + && 'html' === $this->pageRegistry->getRoute($pageModel)->getDefault('_format') + ) { + try { + $urls = [$pageModel->getAbsoluteUrl()]; + + // Get articles with teaser + if (null !== ($articleModels = $articleModelAdapter->findPublishedWithTeaserByPid($pageModel->id, ['ignoreFePreview' => true]))) { + foreach ($articleModels as $articleModel) { + $urls[] = $pageModel->getAbsoluteUrl('/articles/'.($articleModel->alias ?: $articleModel->id)); + } + } + + $result[] = $urls; + } catch (ExceptionInterface $exception) { + // Skip URL for this page but generate child pages + } + } + + $result[] = $this->getPageAndArticleUrls((int) $pageModel->id); + } + + return array_merge(...$result); + } + + public static function getSitemapXmlFiles($withExtension = false, $rootPage = false): array + { + $xmlFiles = []; + + $rootPages = [$rootPage]; + + if (!$rootPage) { + $rootPages = self::getRootPages(); + } + + if (null === $rootPages) { + return []; + } + + foreach($rootPages as $rootPage) { + $shareDir = System::getContainer()->getParameter('contao.web_dir') . '/share'; + + // Scan for xml files which we want to remove + foreach (Folder::scan($shareDir) as $file) + { + if (\is_dir($shareDir . '/' . $file)) + { + continue; // see #6652 + } + + $objFile = new File(StringUtil::stripRootDir($shareDir) . '/' . $file); + + // For removeOldFeeds + if (!$withExtension && $objFile->extension == 'xml' && \str_contains($file, $rootPage->contaoSeoSitemapName.'sitemap')) + { + $xmlFiles[] = \str_replace('.xml', '', $file); + } + + // For robots.txt if we found a sitemap index file + if ($withExtension && $objFile->extension == 'xml' && \str_contains($file, $rootPage->contaoSeoSitemapName.'sitemap-index')) + { + return [$file]; + } + + // For robots.txt if there is only a single sitemap file + if ($withExtension && $objFile->extension == 'xml' && \str_contains($file, $rootPage->contaoSeoSitemapName.'sitemap')) + { + $xmlFiles[] = $file; + } + } + } + + return $xmlFiles; + } +} diff --git a/src/EventListener/RemoveOldFeedsListener.php b/src/EventListener/RemoveOldFeedsListener.php new file mode 100644 index 0000000..d1f9009 --- /dev/null +++ b/src/EventListener/RemoveOldFeedsListener.php @@ -0,0 +1,31 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Pdir\ContaoSeoPlugin\EventListener; + +use Contao\CoreBundle\DependencyInjection\Attribute\AsHook; +use Pdir\ContaoSeoPlugin\Backend\Automator; + +#[AsHook('removeOldFeeds')] +class RemoveOldFeedsListener +{ + public function __invoke(): array + { + return Automator::getSitemapXmlFiles(); + } +} diff --git a/src/EventListener/RobotsTxtListener.php b/src/EventListener/RobotsTxtListener.php new file mode 100644 index 0000000..944bbc4 --- /dev/null +++ b/src/EventListener/RobotsTxtListener.php @@ -0,0 +1,58 @@ + + * + * For the full copyright and license information, please view the LICENSE + * file that was distributed with this source code. + */ + +namespace Pdir\ContaoSeoPlugin\EventListener; + +use Contao\CoreBundle\Event\ContaoCoreEvents; +use Contao\CoreBundle\Event\RobotsTxtEvent; +use Contao\File; +use Contao\Folder; +use Contao\StringUtil; +use Contao\System; +use Pdir\ContaoSeoPlugin\Backend\Automator; +use Symfony\Component\EventDispatcher\Attribute\AsEventListener; + +use webignition\RobotsTxt\Directive\Directive; +use webignition\RobotsTxt\Directive\UserAgentDirective; +use webignition\RobotsTxt\Inspector\Inspector; +use webignition\RobotsTxt\Record\Record; + +#[AsEventListener(ContaoCoreEvents::ROBOTS_TXT)] +class RobotsTxtListener +{ + public function __invoke(RobotsTxtEvent $event): void + { + $file = $event->getFile(); + + $rootPage = $event->getRootPage(); + + if ($rootPage->contaoSeoAddSitemapToRobotsTxt) { + $xmlFiles = Automator::getSitemapXmlFiles(true, $rootPage); + + foreach ($xmlFiles as $xmlFile) { + $sitemap = \sprintf( + '%s%s/share/'.$xmlFile, + $rootPage->useSSL ? 'https://' : 'http://', + $rootPage->dns ?: $event->getRequest()->server->get('HTTP_HOST'), + ); + + $file->getNonGroupDirectives()->add(new Directive('Sitemap', $sitemap)); + } + } + } +}