From 4243028e4cacfa1ffb8795cc33f8aeded37f147e Mon Sep 17 00:00:00 2001 From: Arnt Gulbrandsen Date: Wed, 4 Mar 2026 21:51:46 +0100 Subject: [PATCH 1/3] Detect mixed scripts in domain in a more robust manner. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous code mishandled extended Latin such as münchen.de and also did not detect combinations such as Greek/Cyrillic. This solves both the false negatives and the false positives. As of this commit, links to grå.org get the underline treatment. --- .../thoughtcrime/securesms/util/LinkUtil.kt | 32 ++++++++++++++++--- .../securesms/util/LinkUtilTest_isLegal.java | 8 ++++- .../util/LinkUtilTest_isValidPreviewUrl.kt | 5 ++- 3 files changed, 39 insertions(+), 6 deletions(-) diff --git a/app/src/main/java/org/thoughtcrime/securesms/util/LinkUtil.kt b/app/src/main/java/org/thoughtcrime/securesms/util/LinkUtil.kt index 142af786a94..b7fcc104853 100644 --- a/app/src/main/java/org/thoughtcrime/securesms/util/LinkUtil.kt +++ b/app/src/main/java/org/thoughtcrime/securesms/util/LinkUtil.kt @@ -12,8 +12,6 @@ import java.util.regex.Pattern */ object LinkUtil { private val DOMAIN_PATTERN = Pattern.compile("^(https?://)?([^/]+).*$") - private val ALL_ASCII_PATTERN = Pattern.compile("^[\\x00-\\x7F]*$") - private val ALL_NON_ASCII_PATTERN = Pattern.compile("^[^\\x00-\\x7F]*$") private val ILLEGAL_CHARACTERS_PATTERN = Pattern.compile("[\u202C\u202D\u202E\u2500-\u25FF]") private val ILLEGAL_PERIODS_PATTERN = Pattern.compile("(\\.{2,}|…)") @@ -84,13 +82,39 @@ object LinkUtil { return LegalCharactersResult(false) } - val cleanedDomain = domain.replace("\\.".toRegex(), "") return LegalCharactersResult( - isLegal = ALL_ASCII_PATTERN.matcher(cleanedDomain).matches() || ALL_NON_ASCII_PATTERN.matcher(cleanedDomain).matches(), + isLegal = !mixesScripts(domain), domain = domain ) } + /** + * Returns true if [str] contains letters from more than one Unicode script, + * ignoring characters with script COMMON or INHERITED (digits, punctuation, etc.). + * Used to detect potential homograph attacks in domain names: a domain that mixes, + * say, Cyrillic and Latin letters is suspicious, while an IDN label like "grå" + * that uses only Latin letters (including extended Latin like å) is fine. + */ + private fun mixesScripts(str: String): Boolean { + var firstScript: Character.UnicodeScript? = null + var i = 0 + while (i < str.length) { + val cp = str.codePointAt(i) + if (Character.isLetter(cp)) { + val script = Character.UnicodeScript.of(cp) + if (script != Character.UnicodeScript.COMMON && script != Character.UnicodeScript.INHERITED) { + if (firstScript == null) { + firstScript = script + } else if (script != firstScript) { + return true + } + } + } + i += Character.charCount(cp) + } + return false + } + @JvmStatic private fun isValidURI(linkUri: String?): Boolean { return if (linkUri == null) { diff --git a/app/src/test/java/org/thoughtcrime/securesms/util/LinkUtilTest_isLegal.java b/app/src/test/java/org/thoughtcrime/securesms/util/LinkUtilTest_isLegal.java index e1b30d5b6a7..45e4a48c1dc 100644 --- a/app/src/test/java/org/thoughtcrime/securesms/util/LinkUtilTest_isLegal.java +++ b/app/src/test/java/org/thoughtcrime/securesms/util/LinkUtilTest_isLegal.java @@ -29,7 +29,7 @@ public static Collection data() { { "https://abcdefg.i2p", true }, { "http://кц.com", false }, { "кц.com", false }, - { "http://asĸ.com", false }, + { "http://asĸ.com", true }, // ĸ (U+0138) is Unicode script LATIN { "http://foo.кц.рф", false }, { "кц.рф\u202C", false }, { "кц.рф\u202D", false }, @@ -47,6 +47,12 @@ public static Collection data() { { "localhost", true }, { "https://localhost", true }, { "cool.test", true }, + { "grå.org", true }, // å is Latin script + { "münchen.de", true }, // ü is Latin script + { "慕田峪长城.网址", true }, // Great Wall site + // Мышкин is the idiot in Dostoyevsky's book. + { "Мышкин.рф", true }, // Cyrillic к U+043A + { "Мышκин.рф", false }, // Greek κ U+03BA { "https://github.com/signalapp/Signal-Android/compare/v6.23.2...v6.23.3", true } }); } diff --git a/app/src/test/java/org/thoughtcrime/securesms/util/LinkUtilTest_isValidPreviewUrl.kt b/app/src/test/java/org/thoughtcrime/securesms/util/LinkUtilTest_isValidPreviewUrl.kt index fd5dd777fb7..e2bdbc25be0 100644 --- a/app/src/test/java/org/thoughtcrime/securesms/util/LinkUtilTest_isValidPreviewUrl.kt +++ b/app/src/test/java/org/thoughtcrime/securesms/util/LinkUtilTest_isValidPreviewUrl.kt @@ -52,7 +52,10 @@ class LinkUtilTest_isValidPreviewUrl(private val input: String, private val outp arrayOf("https://cool.invalid.com", true), arrayOf("https://cool.localhost.signal.org", true), arrayOf("https://cool.test.blarg.gov", true), - arrayOf("https://github.com/signalapp/Signal-Android/compare/v6.23.2...v6.23.3", true) + arrayOf("https://github.com/signalapp/Signal-Android/compare/v6.23.2...v6.23.3", true), + arrayOf("https://grå.org", true), + arrayOf("https://grå.org/some/path", true), + arrayOf("http://grå.org", false) ) } } From fe8162b9c8060b74798593bf5d54a7871a2df9fe Mon Sep 17 00:00:00 2001 From: Arnt Gulbrandsen Date: Thu, 5 Mar 2026 17:11:09 +0100 Subject: [PATCH 2/3] Show pasted/shared URLs using UTF8 rather than xn-- blobs. As of this commit, a link shared using the share sheet of a browser, or cut and pasted, is displayed in the human-readable form, provided that the domain name doesn't mix scripts. Note that there are two flows for cut and paste, one paste and one that looks as if one key press delivers a long string. This handles both. --- .../components/emoji/EmojiEditText.java | 45 ++++++ .../securesms/sharing/v2/ShareRepository.kt | 8 +- .../thoughtcrime/securesms/util/LinkUtil.kt | 145 +++++++++++++++++- 3 files changed, 196 insertions(+), 2 deletions(-) diff --git a/app/src/main/java/org/thoughtcrime/securesms/components/emoji/EmojiEditText.java b/app/src/main/java/org/thoughtcrime/securesms/components/emoji/EmojiEditText.java index 6fd9023e011..1c74cd1b338 100644 --- a/app/src/main/java/org/thoughtcrime/securesms/components/emoji/EmojiEditText.java +++ b/app/src/main/java/org/thoughtcrime/securesms/components/emoji/EmojiEditText.java @@ -9,6 +9,9 @@ import android.text.InputFilter; import android.text.TextUtils; import android.util.AttributeSet; +import android.view.inputmethod.EditorInfo; +import android.view.inputmethod.InputConnection; +import android.view.inputmethod.InputConnectionWrapper; import androidx.annotation.NonNull; import androidx.annotation.Nullable; @@ -18,6 +21,7 @@ import org.thoughtcrime.securesms.components.emoji.EmojiProvider.EmojiDrawable; import org.thoughtcrime.securesms.keyvalue.SignalStore; import org.thoughtcrime.securesms.util.EditTextExtensionsKt; +import org.thoughtcrime.securesms.util.LinkUtil; import org.thoughtcrime.securesms.util.ServiceUtil; import org.thoughtcrime.securesms.util.TextSecurePreferences; import org.signal.core.util.Util; @@ -61,6 +65,27 @@ public EmojiEditText(Context context, AttributeSet attrs, int defStyleAttr) { } } + @Override + public InputConnection onCreateInputConnection(EditorInfo outAttrs) { + InputConnection base = super.onCreateInputConnection(outAttrs); + if (base == null) return null; + return new InputConnectionWrapper(base, true) { + @Override + public boolean commitText(CharSequence text, int newCursorPosition) { + if (text != null) { + String trimmed = text.toString().trim(); + if (LinkUtil.isLegalUrl(trimmed)) { + String display = LinkUtil.toDisplayUrl(trimmed); + if (!display.equals(trimmed)) { + return super.commitText(display, newCursorPosition); + } + } + } + return super.commitText(text, newCursorPosition); + } + }; + } + public void insertEmoji(String emoji) { final int start = getSelectionStart(); final int end = getSelectionEnd(); @@ -121,6 +146,17 @@ public boolean onTextContextMenuItem(int id) { if (TextUtils.equals(Util.COPY_LABEL, label) && shouldPersistSignalStylingWhenPasting()) { return super.onTextContextMenuItem(id); } else { + CharSequence pasteText = getTextFromClipData(clipData); + if (pasteText != null) { + String trimmed = pasteText.toString().trim(); + if (LinkUtil.isLegalUrl(trimmed)) { + String display = LinkUtil.toDisplayUrl(trimmed); + if (!display.equals(trimmed)) { + pasteUrlDisplay(display); + return true; + } + } + } return super.onTextContextMenuItem(android.R.id.pasteAsPlainText); } } @@ -140,6 +176,15 @@ public boolean onTextContextMenuItem(int id) { return super.onTextContextMenuItem(id); } + private void pasteUrlDisplay(@NonNull String display) { + if (getText() == null) return; + int start = Math.max(0, getSelectionStart()); + int end = Math.max(0, getSelectionEnd()); + if (start > end) { int tmp = start; start = end; end = tmp; } + getText().replace(start, end, display); + setSelection(start + display.length()); + } + private @Nullable CharSequence getTextFromClipData(@Nullable ClipData data) { if (data != null && data.getItemCount() > 0) { return data.getItemAt(0).coerceToText(getContext()); diff --git a/app/src/main/java/org/thoughtcrime/securesms/sharing/v2/ShareRepository.kt b/app/src/main/java/org/thoughtcrime/securesms/sharing/v2/ShareRepository.kt index 6a382062a54..8e7a6c7a4b4 100644 --- a/app/src/main/java/org/thoughtcrime/securesms/sharing/v2/ShareRepository.kt +++ b/app/src/main/java/org/thoughtcrime/securesms/sharing/v2/ShareRepository.kt @@ -10,6 +10,7 @@ import io.reactivex.rxjava3.schedulers.Schedulers import org.signal.core.models.media.Media import org.signal.core.util.logging.Log import org.thoughtcrime.securesms.providers.BlobProvider +import org.thoughtcrime.securesms.util.LinkUtil import org.thoughtcrime.securesms.util.MediaUtil import org.thoughtcrime.securesms.util.RemoteConfig import org.thoughtcrime.securesms.util.UriUtil @@ -24,7 +25,7 @@ class ShareRepository(context: Context) { return when (unresolvedShareData) { is UnresolvedShareData.ExternalMultiShare -> Single.fromCallable { resolve(unresolvedShareData) } is UnresolvedShareData.ExternalSingleShare -> Single.fromCallable { resolve(unresolvedShareData) } - is UnresolvedShareData.ExternalPrimitiveShare -> Single.just(ResolvedShareData.Primitive(unresolvedShareData.text)) + is UnresolvedShareData.ExternalPrimitiveShare -> Single.just(ResolvedShareData.Primitive(prettifyIfUrl(unresolvedShareData.text))) }.subscribeOn(Schedulers.io()) } @@ -130,6 +131,11 @@ class ShareRepository(context: Context) { companion object { private val TAG = Log.tag(ShareRepository::class.java) + private fun prettifyIfUrl(text: CharSequence): CharSequence { + val trimmed = text.toString().trim() + return if (LinkUtil.isLegalUrl(trimmed)) LinkUtil.toDisplayUrl(trimmed) else text + } + private fun getMimeType(context: Context, uri: Uri, mimeType: String?, fileExtension: String? = null): String { var updatedMimeType = MediaUtil.getMimeType(context, uri, fileExtension) if (updatedMimeType == null) { diff --git a/app/src/main/java/org/thoughtcrime/securesms/util/LinkUtil.kt b/app/src/main/java/org/thoughtcrime/securesms/util/LinkUtil.kt index b7fcc104853..078c417efab 100644 --- a/app/src/main/java/org/thoughtcrime/securesms/util/LinkUtil.kt +++ b/app/src/main/java/org/thoughtcrime/securesms/util/LinkUtil.kt @@ -2,8 +2,13 @@ package org.thoughtcrime.securesms.util import okhttp3.HttpUrl.Companion.toHttpUrlOrNull import org.thoughtcrime.securesms.stickers.StickerUrl +import java.io.ByteArrayOutputStream +import java.net.IDN import java.net.URI import java.net.URISyntaxException +import java.nio.ByteBuffer +import java.nio.charset.CharacterCodingException +import java.nio.charset.CodingErrorAction import java.util.Objects import java.util.regex.Pattern @@ -14,7 +19,6 @@ object LinkUtil { private val DOMAIN_PATTERN = Pattern.compile("^(https?://)?([^/]+).*$") private val ILLEGAL_CHARACTERS_PATTERN = Pattern.compile("[\u202C\u202D\u202E\u2500-\u25FF]") private val ILLEGAL_PERIODS_PATTERN = Pattern.compile("(\\.{2,}|…)") - private val INVALID_DOMAINS = listOf("example", "example\\.com", "example\\.net", "example\\.org", "i2p", "invalid", "localhost", "onion", "test") private val INVALID_DOMAINS_REGEX: Regex = Regex("^(.+\\.)?(${INVALID_DOMAINS.joinToString("|")})\\.?\$") @@ -115,6 +119,145 @@ object LinkUtil { return false } + /** + * Converts a URL to a human-readable display form: + * 1. ACE/punycode domain labels are decoded to Unicode when the decoded domain passes [isLegalUrl]. + * 2. Percent-encoded path bytes are decoded when they represent ASCII letters, ASCII digits, + * hyphens, or sequences of UTF-8 bytes that decode to Unicode letters or digits. + * All other percent-encoded bytes (spaces, slashes, control chars, …) are left as-is. + */ + @JvmStatic + fun toDisplayUrl(url: String): String { + return try { + val uri = URI(url) + val host = uri.host ?: return url + + val unicodeHost = IDN.toUnicode(host) + val displayHost = if (isLegalUrl(unicodeHost)) unicodeHost else host + val niceRawPath = decodeUrlSafeChars(uri.rawPath ?: "") + + buildString { + if (uri.scheme != null) append("${uri.scheme}://") + if (uri.rawUserInfo != null) append("${uri.rawUserInfo}@") + append(displayHost) + if (uri.port != -1) append(":${uri.port}") + append(niceRawPath) + if (uri.rawQuery != null) append("?${uri.rawQuery}") + if (uri.rawFragment != null) append("#${uri.rawFragment}") + } + } catch (e: Exception) { + url + } + } + + /** + * Decodes percent-encoded byte sequences that represent ASCII letters, ASCII digits, hyphens, + * or multi-byte UTF-8 sequences whose decoded Unicode code point is a letter or digit. + * All other percent-encoded sequences are left unchanged. + * + * If fully decoding all percent-encoded bytes would not yield valid UTF-8, the string is + * returned unchanged — partial decoding would produce misleading output (e.g. a bare lead + * byte next to a decoded ASCII character that happened to share a code unit with a + * continuation byte). + */ + private fun decodeUrlSafeChars(encoded: String): String { + if (!encoded.contains('%')) return encoded + if (!isFullyDecodedUtf8Valid(encoded)) return encoded + val sb = StringBuilder(encoded.length) + var i = 0 + while (i < encoded.length) { + val c = encoded[i] + if (c != '%' || i + 2 >= encoded.length) { + sb.append(c) + i++ + continue + } + val firstHex = encoded.substring(i + 1, i + 3).toIntOrNull(16) + if (firstHex == null) { + sb.append(c) + i++ + continue + } + val firstByte = firstHex and 0xFF + val cpByteCount = when { + firstByte and 0x80 == 0 -> 1 // 0xxxxxxx ASCII + firstByte and 0xE0 == 0xC0 -> 2 // 110xxxxx 2-byte UTF-8 + firstByte and 0xF0 == 0xE0 -> 3 // 1110xxxx 3-byte UTF-8 + firstByte and 0xF8 == 0xF0 -> 4 // 11110xxx 4-byte UTF-8 + else -> 0 // continuation or invalid lead byte + } + if (cpByteCount <= 0) { + sb.append(encoded, i, i + 3) + i += 3 + continue + } + // Collect cpByteCount consecutive %XX tokens. + val rawTokens = ArrayList(cpByteCount) + val rawBytes = ArrayList(cpByteCount) + var j = i + var ok = true + for (k in 0 until cpByteCount) { + if (j + 2 >= encoded.length || encoded[j] != '%') { ok = false; break } + val hex = encoded.substring(j + 1, j + 3) + val bInt = hex.toIntOrNull(16) + if (bInt == null) { ok = false; break } + if (k > 0 && (bInt and 0xC0 != 0x80)) { ok = false; break } // must be continuation byte + rawTokens.add(encoded.substring(j, j + 3)) + rawBytes.add(bInt.toByte()) + j += 3 + } + if (!ok || rawBytes.size != cpByteCount) { + // Could not assemble a complete code point — emit only the first %XX raw. + sb.append(encoded, i, i + 3) + i += 3 + continue + } + val byteArray = rawBytes.toByteArray() + val decoded = String(byteArray, Charsets.UTF_8) + val cp = decoded.codePointAt(0) + if (cp != 0xFFFD && (Character.isLetter(cp) || Character.isDigit(cp) || cp == '-'.code)) { + sb.appendCodePoint(cp) + } else { + sb.append(rawTokens.joinToString("")) + } + i = j + } + return sb.toString() + } + + /** + * Returns true if decoding every percent-encoded byte sequence in [encoded] would yield a + * byte stream that is valid UTF-8. Literal (non-encoded) characters are already valid Unicode + * and always contribute valid UTF-8 bytes. Percent sequences with invalid hex digits are + * treated as literal '%' characters. + */ + private fun isFullyDecodedUtf8Valid(encoded: String): Boolean { + val buf = ByteArrayOutputStream(encoded.length) + var i = 0 + while (i < encoded.length) { + if (encoded[i] == '%' && i + 2 < encoded.length) { + val hex = encoded.substring(i + 1, i + 3).toIntOrNull(16) + if (hex != null) { + buf.write(hex) + i += 3 + continue + } + } + val cp = encoded.codePointAt(i) + buf.write(String(Character.toChars(cp)).toByteArray(Charsets.UTF_8)) + i += Character.charCount(cp) + } + return try { + Charsets.UTF_8.newDecoder() + .onMalformedInput(CodingErrorAction.REPORT) + .onUnmappableCharacter(CodingErrorAction.REPORT) + .decode(ByteBuffer.wrap(buf.toByteArray())) + true + } catch (_: CharacterCodingException) { + false + } + } + @JvmStatic private fun isValidURI(linkUri: String?): Boolean { return if (linkUri == null) { From ef34d3c17d507714a5f4c5f77a513032de0c432e Mon Sep 17 00:00:00 2001 From: Arnt Gulbrandsen Date: Thu, 5 Mar 2026 17:20:21 +0100 Subject: [PATCH 3/3] Show the human-readable domain in the preview pane, not the xn-- blob. --- .../securesms/components/LinkPreviewView.java | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/app/src/main/java/org/thoughtcrime/securesms/components/LinkPreviewView.java b/app/src/main/java/org/thoughtcrime/securesms/components/LinkPreviewView.java index 827cbb9eabc..6c7315ec25d 100644 --- a/app/src/main/java/org/thoughtcrime/securesms/components/LinkPreviewView.java +++ b/app/src/main/java/org/thoughtcrime/securesms/components/LinkPreviewView.java @@ -30,8 +30,10 @@ import org.thoughtcrime.securesms.mms.ImageSlide; import org.thoughtcrime.securesms.mms.SlidesClickedListener; import org.signal.core.util.Util; +import org.thoughtcrime.securesms.util.LinkUtil; import org.thoughtcrime.securesms.util.ViewUtil; +import java.net.IDN; import java.text.DateFormat; import java.text.SimpleDateFormat; import java.util.Locale; @@ -199,6 +201,12 @@ public void setLinkPreview(@NonNull RequestManager requestManager, @NonNull Link HttpUrl url = HttpUrl.parse(linkPreview.getUrl()); if (url != null) { domain = url.topPrivateDomain(); + if (domain != null) { + String unicodeDomain = IDN.toUnicode(domain); + if (LinkUtil.isLegalUrl(unicodeDomain)) { + domain = unicodeDomain; + } + } } }