diff --git a/composer.json b/composer.json index e696c132..2cfe69c7 100644 --- a/composer.json +++ b/composer.json @@ -79,7 +79,9 @@ "ext-imap": "*", "tatevikgr/rss-feed": "dev-main", "ext-pdo": "*", - "ezyang/htmlpurifier": "^4.19" + "ezyang/htmlpurifier": "^4.19", + "ext-libxml": "*", + "ext-gd": "*" }, "require-dev": { "phpunit/phpunit": "^9.5", diff --git a/config/parameters.yml.dist b/config/parameters.yml.dist index 41c9a20b..cacec120 100644 --- a/config/parameters.yml.dist +++ b/config/parameters.yml.dist @@ -25,6 +25,12 @@ parameters: env(DATABASE_PREFIX): 'phplist_' list_table_prefix: '%%env(LIST_TABLE_PREFIX)%%' env(LIST_TABLE_PREFIX): 'listattr_' + app.dev_version: '%%env(APP_DEV_VERSION)%%' + env(APP_DEV_VERSION): 0 + app.dev_email: '%%env(APP_DEV_EMAIL)%%' + env(APP_DEV_EMAIL): 'dev@dev.com' + app.powered_by_phplist: '%%env(APP_POWERED_BY_PHPLIST)%%' + env(APP_POWERED_BY_PHPLIST): 0 # Email configuration app.mailer_from: '%%env(MAILER_FROM)%%' @@ -89,3 +95,14 @@ parameters: env(MESSAGING_MAX_PROCESS_TIME): '600' messaging.max_mail_size: '%%env(MAX_MAILSIZE)%%' env(MAX_MAILSIZE): '209715200' + messaging.default_message_age: '%%env(DEFAULT_MESSAGEAGE)%%' + env(DEFAULT_MESSAGEAGE): '691200' + messaging.use_manual_text_part: '%%env(USE_MANUAL_TEXT_PART)%%' + env(USE_MANUAL_TEXT_PART): 0 + messaging.blacklist_grace_time: '%%env(MESSAGING_BLACKLIST_GRACE_TIME)%%' + env(MESSAGING_BLACKLIST_GRACE_TIME): + + phplist.upload_images_dir: '%%env(PHPLIST_UPLOADIMAGES_DIR)%%' + env(PHPLIST_UPLOADIMAGES_DIR): 'images' + phplist.public_schema: '%%env(PUBLIC_SCHEMA)%%' + env(PUBLIC_SCHEMA): 'http' diff --git a/src/Domain/Analytics/Service/LinkTrackService.php b/src/Domain/Analytics/Service/LinkTrackService.php index 902092f6..0b3a8c5e 100644 --- a/src/Domain/Analytics/Service/LinkTrackService.php +++ b/src/Domain/Analytics/Service/LinkTrackService.php @@ -8,8 +8,7 @@ use PhpList\Core\Domain\Analytics\Exception\MissingMessageIdException; use PhpList\Core\Domain\Analytics\Model\LinkTrack; use PhpList\Core\Domain\Analytics\Repository\LinkTrackRepository; -use PhpList\Core\Domain\Messaging\Model\Message; -use PhpList\Core\Domain\Messaging\Model\Message\MessageContent; +use PhpList\Core\Domain\Messaging\Model\Dto\MessagePrecacheDto; class LinkTrackService { @@ -39,7 +38,7 @@ public function isExtractAndSaveLinksApplicable(): bool * @return LinkTrack[] The saved LinkTrack entities * @throws MissingMessageIdException */ - public function extractAndSaveLinks(MessageContent $content, int $userId, ?int $messageId = null): array + public function extractAndSaveLinks(MessagePrecacheDto $content, int $userId, ?int $messageId = null): array { if (!$this->isExtractAndSaveLinksApplicable()) { return []; @@ -49,10 +48,10 @@ public function extractAndSaveLinks(MessageContent $content, int $userId, ?int $ throw new MissingMessageIdException(); } - $links = $this->extractLinksFromHtml($content->getText() ?? ''); + $links = $this->extractLinksFromHtml($content->content ?? ''); - if ($content->getFooter() !== null) { - $links = array_merge($links, $this->extractLinksFromHtml($content->getFooter())); + if ($content->htmlFooter) { + $links = array_merge($links, $this->extractLinksFromHtml($content->htmlFooter)); } $links = array_unique($links); diff --git a/src/Domain/Common/Html2Text.php b/src/Domain/Common/Html2Text.php new file mode 100644 index 00000000..93705748 --- /dev/null +++ b/src/Domain/Common/Html2Text.php @@ -0,0 +1,85 @@ +]*>(.*?)<\/script\s*>/is", '', $text); + $text = preg_replace("/]*>(.*?)<\/style\s*>/is", '', $text); + + $text = preg_replace( + "/]*href=([\"\'])(.*)\\1[^>]*>(.*)<\/a>/Umis", + "[URLTEXT]\\3[ENDURLTEXT][LINK]\\2[ENDLINK]\n", + $text + ); + $text = preg_replace("/(.*?)<\/b\s*>/is", '*\\1*', $text); + $text = preg_replace("/(.*?)<\/h[\d]\s*>/is", "**\\1**\n", $text); + $text = preg_replace("/(.*?)<\/i\s*>/is", '/\\1/', $text); + $text = preg_replace("/<\/tr\s*?>/i", "<\/tr>\n\n", $text); + $text = preg_replace("/<\/p\s*?>/i", "<\/p>\n\n", $text); + $text = preg_replace('/]*?>/i', "
\n", $text); + $text = preg_replace("/]*?\/>/i", "\n", $text); + $text = preg_replace('/ $fullmatch) { + $linktext = $links[1][$matchindex]; + $linkurl = $links[2][$matchindex]; + // check if the text linked is a repetition of the URL + if (trim($linktext) == trim($linkurl) || + 'https://'.trim($linktext) == trim($linkurl) || + 'http://'.trim($linktext) == trim($linkurl) + ) { + $linkreplace = $linkurl; + } else { + //# if link is an anchor only, take it out + if (strpos($linkurl, '#') === 0) { + $linkreplace = $linktext; + } else { + $linkreplace = $linktext.' <'.$linkurl.'>'; + } + } + $text = str_replace($fullmatch, $linkreplace, $text); + } + $text = preg_replace( + "/]*>(.*?)<\/a>/is", + '[URLTEXT]\\2[ENDURLTEXT][LINK]\\1[ENDLINK]', + $text, + 500 + ); + + $text = html_entity_decode($text, ENT_QUOTES | ENT_HTML5, 'UTF-8'); + + $text = preg_replace('/###NL###/', "\n", $text); + $text = preg_replace("/\n /", "\n", $text); + $text = preg_replace("/\t/", ' ', $text); + + // reduce whitespace + while (preg_match('/ /', $text)) { + $text = preg_replace('/ /', ' ', $text); + } + while (preg_match("/\n\s*\n\s*\n/", $text)) { + $text = preg_replace("/\n\s*\n\s*\n/", "\n\n", $text); + } + $ww = $this->configProvider->getValue(ConfigOption::WordWrap) ?? self::WORD_WRAP; + + return wordwrap($text, $ww); + } +} diff --git a/src/Domain/Common/HtmlUrlRewriter.php b/src/Domain/Common/HtmlUrlRewriter.php new file mode 100644 index 00000000..bba0b46e --- /dev/null +++ b/src/Domain/Common/HtmlUrlRewriter.php @@ -0,0 +1,190 @@ +
' . $html . '
'; + $dom->loadHTML($wrapped, LIBXML_HTML_NOIMPLIED | LIBXML_HTML_NODEFDTD); + + $xpath = new DOMXPath($dom); + + // Attributes to rewrite + $attrMap = [ + '//*[@src]' => 'src', + '//*[@href]' => 'href', + '//*[@action]' => 'action', + '//*[@background]' => 'background', + ]; + + foreach ($attrMap as $query => $attr) { + foreach ($xpath->query($query) as $node) { + /** @var DOMElement $node */ + $val = $node->getAttribute($attr); + $node->setAttribute($attr, $this->absolutizeUrl($val, $baseUrl)); + } + } + + // srcset needs special handling (multiple candidates) + foreach ($xpath->query('//*[@srcset]') as $node) { + /** @var DOMElement $node */ + $node->setAttribute('srcset', $this->rewriteSrcset($node->getAttribute('srcset'), $baseUrl)); + } + + // 2) Rewrite inline