diff --git a/app/src/main/java/org/thoughtcrime/securesms/components/LinkPreviewView.java b/app/src/main/java/org/thoughtcrime/securesms/components/LinkPreviewView.java index 827cbb9eabc..6c7315ec25d 100644 --- a/app/src/main/java/org/thoughtcrime/securesms/components/LinkPreviewView.java +++ b/app/src/main/java/org/thoughtcrime/securesms/components/LinkPreviewView.java @@ -30,8 +30,10 @@ import org.thoughtcrime.securesms.mms.ImageSlide; import org.thoughtcrime.securesms.mms.SlidesClickedListener; import org.signal.core.util.Util; +import org.thoughtcrime.securesms.util.LinkUtil; import org.thoughtcrime.securesms.util.ViewUtil; +import java.net.IDN; import java.text.DateFormat; import java.text.SimpleDateFormat; import java.util.Locale; @@ -199,6 +201,12 @@ public void setLinkPreview(@NonNull RequestManager requestManager, @NonNull Link HttpUrl url = HttpUrl.parse(linkPreview.getUrl()); if (url != null) { domain = url.topPrivateDomain(); + if (domain != null) { + String unicodeDomain = IDN.toUnicode(domain); + if (LinkUtil.isLegalUrl(unicodeDomain)) { + domain = unicodeDomain; + } + } } } diff --git a/app/src/main/java/org/thoughtcrime/securesms/components/emoji/EmojiEditText.java b/app/src/main/java/org/thoughtcrime/securesms/components/emoji/EmojiEditText.java index 6fd9023e011..1c74cd1b338 100644 --- a/app/src/main/java/org/thoughtcrime/securesms/components/emoji/EmojiEditText.java +++ b/app/src/main/java/org/thoughtcrime/securesms/components/emoji/EmojiEditText.java @@ -9,6 +9,9 @@ import android.text.InputFilter; import android.text.TextUtils; import android.util.AttributeSet; +import android.view.inputmethod.EditorInfo; +import android.view.inputmethod.InputConnection; +import android.view.inputmethod.InputConnectionWrapper; import androidx.annotation.NonNull; import androidx.annotation.Nullable; @@ -18,6 +21,7 @@ import org.thoughtcrime.securesms.components.emoji.EmojiProvider.EmojiDrawable; import org.thoughtcrime.securesms.keyvalue.SignalStore; import org.thoughtcrime.securesms.util.EditTextExtensionsKt; +import org.thoughtcrime.securesms.util.LinkUtil; import org.thoughtcrime.securesms.util.ServiceUtil; import org.thoughtcrime.securesms.util.TextSecurePreferences; import org.signal.core.util.Util; @@ -61,6 +65,27 @@ public EmojiEditText(Context context, AttributeSet attrs, int defStyleAttr) { } } + @Override + public InputConnection onCreateInputConnection(EditorInfo outAttrs) { + InputConnection base = super.onCreateInputConnection(outAttrs); + if (base == null) return null; + return new InputConnectionWrapper(base, true) { + @Override + public boolean commitText(CharSequence text, int newCursorPosition) { + if (text != null) { + String trimmed = text.toString().trim(); + if (LinkUtil.isLegalUrl(trimmed)) { + String display = LinkUtil.toDisplayUrl(trimmed); + if (!display.equals(trimmed)) { + return super.commitText(display, newCursorPosition); + } + } + } + return super.commitText(text, newCursorPosition); + } + }; + } + public void insertEmoji(String emoji) { final int start = getSelectionStart(); final int end = getSelectionEnd(); @@ -121,6 +146,17 @@ public boolean onTextContextMenuItem(int id) { if (TextUtils.equals(Util.COPY_LABEL, label) && shouldPersistSignalStylingWhenPasting()) { return super.onTextContextMenuItem(id); } else { + CharSequence pasteText = getTextFromClipData(clipData); + if (pasteText != null) { + String trimmed = pasteText.toString().trim(); + if (LinkUtil.isLegalUrl(trimmed)) { + String display = LinkUtil.toDisplayUrl(trimmed); + if (!display.equals(trimmed)) { + pasteUrlDisplay(display); + return true; + } + } + } return super.onTextContextMenuItem(android.R.id.pasteAsPlainText); } } @@ -140,6 +176,15 @@ public boolean onTextContextMenuItem(int id) { return super.onTextContextMenuItem(id); } + private void pasteUrlDisplay(@NonNull String display) { + if (getText() == null) return; + int start = Math.max(0, getSelectionStart()); + int end = Math.max(0, getSelectionEnd()); + if (start > end) { int tmp = start; start = end; end = tmp; } + getText().replace(start, end, display); + setSelection(start + display.length()); + } + private @Nullable CharSequence getTextFromClipData(@Nullable ClipData data) { if (data != null && data.getItemCount() > 0) { return data.getItemAt(0).coerceToText(getContext()); diff --git a/app/src/main/java/org/thoughtcrime/securesms/sharing/v2/ShareRepository.kt b/app/src/main/java/org/thoughtcrime/securesms/sharing/v2/ShareRepository.kt index 6a382062a54..8e7a6c7a4b4 100644 --- a/app/src/main/java/org/thoughtcrime/securesms/sharing/v2/ShareRepository.kt +++ b/app/src/main/java/org/thoughtcrime/securesms/sharing/v2/ShareRepository.kt @@ -10,6 +10,7 @@ import io.reactivex.rxjava3.schedulers.Schedulers import org.signal.core.models.media.Media import org.signal.core.util.logging.Log import org.thoughtcrime.securesms.providers.BlobProvider +import org.thoughtcrime.securesms.util.LinkUtil import org.thoughtcrime.securesms.util.MediaUtil import org.thoughtcrime.securesms.util.RemoteConfig import org.thoughtcrime.securesms.util.UriUtil @@ -24,7 +25,7 @@ class ShareRepository(context: Context) { return when (unresolvedShareData) { is UnresolvedShareData.ExternalMultiShare -> Single.fromCallable { resolve(unresolvedShareData) } is UnresolvedShareData.ExternalSingleShare -> Single.fromCallable { resolve(unresolvedShareData) } - is UnresolvedShareData.ExternalPrimitiveShare -> Single.just(ResolvedShareData.Primitive(unresolvedShareData.text)) + is UnresolvedShareData.ExternalPrimitiveShare -> Single.just(ResolvedShareData.Primitive(prettifyIfUrl(unresolvedShareData.text))) }.subscribeOn(Schedulers.io()) } @@ -130,6 +131,11 @@ class ShareRepository(context: Context) { companion object { private val TAG = Log.tag(ShareRepository::class.java) + private fun prettifyIfUrl(text: CharSequence): CharSequence { + val trimmed = text.toString().trim() + return if (LinkUtil.isLegalUrl(trimmed)) LinkUtil.toDisplayUrl(trimmed) else text + } + private fun getMimeType(context: Context, uri: Uri, mimeType: String?, fileExtension: String? = null): String { var updatedMimeType = MediaUtil.getMimeType(context, uri, fileExtension) if (updatedMimeType == null) { diff --git a/app/src/main/java/org/thoughtcrime/securesms/util/LinkUtil.kt b/app/src/main/java/org/thoughtcrime/securesms/util/LinkUtil.kt index 142af786a94..078c417efab 100644 --- a/app/src/main/java/org/thoughtcrime/securesms/util/LinkUtil.kt +++ b/app/src/main/java/org/thoughtcrime/securesms/util/LinkUtil.kt @@ -2,8 +2,13 @@ package org.thoughtcrime.securesms.util import okhttp3.HttpUrl.Companion.toHttpUrlOrNull import org.thoughtcrime.securesms.stickers.StickerUrl +import java.io.ByteArrayOutputStream +import java.net.IDN import java.net.URI import java.net.URISyntaxException +import java.nio.ByteBuffer +import java.nio.charset.CharacterCodingException +import java.nio.charset.CodingErrorAction import java.util.Objects import java.util.regex.Pattern @@ -12,11 +17,8 @@ import java.util.regex.Pattern */ object LinkUtil { private val DOMAIN_PATTERN = Pattern.compile("^(https?://)?([^/]+).*$") - private val ALL_ASCII_PATTERN = Pattern.compile("^[\\x00-\\x7F]*$") - private val ALL_NON_ASCII_PATTERN = Pattern.compile("^[^\\x00-\\x7F]*$") private val ILLEGAL_CHARACTERS_PATTERN = Pattern.compile("[\u202C\u202D\u202E\u2500-\u25FF]") private val ILLEGAL_PERIODS_PATTERN = Pattern.compile("(\\.{2,}|…)") - private val INVALID_DOMAINS = listOf("example", "example\\.com", "example\\.net", "example\\.org", "i2p", "invalid", "localhost", "onion", "test") private val INVALID_DOMAINS_REGEX: Regex = Regex("^(.+\\.)?(${INVALID_DOMAINS.joinToString("|")})\\.?\$") @@ -84,13 +86,178 @@ object LinkUtil { return LegalCharactersResult(false) } - val cleanedDomain = domain.replace("\\.".toRegex(), "") return LegalCharactersResult( - isLegal = ALL_ASCII_PATTERN.matcher(cleanedDomain).matches() || ALL_NON_ASCII_PATTERN.matcher(cleanedDomain).matches(), + isLegal = !mixesScripts(domain), domain = domain ) } + /** + * Returns true if [str] contains letters from more than one Unicode script, + * ignoring characters with script COMMON or INHERITED (digits, punctuation, etc.). + * Used to detect potential homograph attacks in domain names: a domain that mixes, + * say, Cyrillic and Latin letters is suspicious, while an IDN label like "grå" + * that uses only Latin letters (including extended Latin like å) is fine. + */ + private fun mixesScripts(str: String): Boolean { + var firstScript: Character.UnicodeScript? = null + var i = 0 + while (i < str.length) { + val cp = str.codePointAt(i) + if (Character.isLetter(cp)) { + val script = Character.UnicodeScript.of(cp) + if (script != Character.UnicodeScript.COMMON && script != Character.UnicodeScript.INHERITED) { + if (firstScript == null) { + firstScript = script + } else if (script != firstScript) { + return true + } + } + } + i += Character.charCount(cp) + } + return false + } + + /** + * Converts a URL to a human-readable display form: + * 1. ACE/punycode domain labels are decoded to Unicode when the decoded domain passes [isLegalUrl]. + * 2. Percent-encoded path bytes are decoded when they represent ASCII letters, ASCII digits, + * hyphens, or sequences of UTF-8 bytes that decode to Unicode letters or digits. + * All other percent-encoded bytes (spaces, slashes, control chars, …) are left as-is. + */ + @JvmStatic + fun toDisplayUrl(url: String): String { + return try { + val uri = URI(url) + val host = uri.host ?: return url + + val unicodeHost = IDN.toUnicode(host) + val displayHost = if (isLegalUrl(unicodeHost)) unicodeHost else host + val niceRawPath = decodeUrlSafeChars(uri.rawPath ?: "") + + buildString { + if (uri.scheme != null) append("${uri.scheme}://") + if (uri.rawUserInfo != null) append("${uri.rawUserInfo}@") + append(displayHost) + if (uri.port != -1) append(":${uri.port}") + append(niceRawPath) + if (uri.rawQuery != null) append("?${uri.rawQuery}") + if (uri.rawFragment != null) append("#${uri.rawFragment}") + } + } catch (e: Exception) { + url + } + } + + /** + * Decodes percent-encoded byte sequences that represent ASCII letters, ASCII digits, hyphens, + * or multi-byte UTF-8 sequences whose decoded Unicode code point is a letter or digit. + * All other percent-encoded sequences are left unchanged. + * + * If fully decoding all percent-encoded bytes would not yield valid UTF-8, the string is + * returned unchanged — partial decoding would produce misleading output (e.g. a bare lead + * byte next to a decoded ASCII character that happened to share a code unit with a + * continuation byte). + */ + private fun decodeUrlSafeChars(encoded: String): String { + if (!encoded.contains('%')) return encoded + if (!isFullyDecodedUtf8Valid(encoded)) return encoded + val sb = StringBuilder(encoded.length) + var i = 0 + while (i < encoded.length) { + val c = encoded[i] + if (c != '%' || i + 2 >= encoded.length) { + sb.append(c) + i++ + continue + } + val firstHex = encoded.substring(i + 1, i + 3).toIntOrNull(16) + if (firstHex == null) { + sb.append(c) + i++ + continue + } + val firstByte = firstHex and 0xFF + val cpByteCount = when { + firstByte and 0x80 == 0 -> 1 // 0xxxxxxx ASCII + firstByte and 0xE0 == 0xC0 -> 2 // 110xxxxx 2-byte UTF-8 + firstByte and 0xF0 == 0xE0 -> 3 // 1110xxxx 3-byte UTF-8 + firstByte and 0xF8 == 0xF0 -> 4 // 11110xxx 4-byte UTF-8 + else -> 0 // continuation or invalid lead byte + } + if (cpByteCount <= 0) { + sb.append(encoded, i, i + 3) + i += 3 + continue + } + // Collect cpByteCount consecutive %XX tokens. + val rawTokens = ArrayList(cpByteCount) + val rawBytes = ArrayList(cpByteCount) + var j = i + var ok = true + for (k in 0 until cpByteCount) { + if (j + 2 >= encoded.length || encoded[j] != '%') { ok = false; break } + val hex = encoded.substring(j + 1, j + 3) + val bInt = hex.toIntOrNull(16) + if (bInt == null) { ok = false; break } + if (k > 0 && (bInt and 0xC0 != 0x80)) { ok = false; break } // must be continuation byte + rawTokens.add(encoded.substring(j, j + 3)) + rawBytes.add(bInt.toByte()) + j += 3 + } + if (!ok || rawBytes.size != cpByteCount) { + // Could not assemble a complete code point — emit only the first %XX raw. + sb.append(encoded, i, i + 3) + i += 3 + continue + } + val byteArray = rawBytes.toByteArray() + val decoded = String(byteArray, Charsets.UTF_8) + val cp = decoded.codePointAt(0) + if (cp != 0xFFFD && (Character.isLetter(cp) || Character.isDigit(cp) || cp == '-'.code)) { + sb.appendCodePoint(cp) + } else { + sb.append(rawTokens.joinToString("")) + } + i = j + } + return sb.toString() + } + + /** + * Returns true if decoding every percent-encoded byte sequence in [encoded] would yield a + * byte stream that is valid UTF-8. Literal (non-encoded) characters are already valid Unicode + * and always contribute valid UTF-8 bytes. Percent sequences with invalid hex digits are + * treated as literal '%' characters. + */ + private fun isFullyDecodedUtf8Valid(encoded: String): Boolean { + val buf = ByteArrayOutputStream(encoded.length) + var i = 0 + while (i < encoded.length) { + if (encoded[i] == '%' && i + 2 < encoded.length) { + val hex = encoded.substring(i + 1, i + 3).toIntOrNull(16) + if (hex != null) { + buf.write(hex) + i += 3 + continue + } + } + val cp = encoded.codePointAt(i) + buf.write(String(Character.toChars(cp)).toByteArray(Charsets.UTF_8)) + i += Character.charCount(cp) + } + return try { + Charsets.UTF_8.newDecoder() + .onMalformedInput(CodingErrorAction.REPORT) + .onUnmappableCharacter(CodingErrorAction.REPORT) + .decode(ByteBuffer.wrap(buf.toByteArray())) + true + } catch (_: CharacterCodingException) { + false + } + } + @JvmStatic private fun isValidURI(linkUri: String?): Boolean { return if (linkUri == null) { diff --git a/app/src/test/java/org/thoughtcrime/securesms/util/LinkUtilTest_isLegal.java b/app/src/test/java/org/thoughtcrime/securesms/util/LinkUtilTest_isLegal.java index e1b30d5b6a7..45e4a48c1dc 100644 --- a/app/src/test/java/org/thoughtcrime/securesms/util/LinkUtilTest_isLegal.java +++ b/app/src/test/java/org/thoughtcrime/securesms/util/LinkUtilTest_isLegal.java @@ -29,7 +29,7 @@ public static Collection data() { { "https://abcdefg.i2p", true }, { "http://кц.com", false }, { "кц.com", false }, - { "http://asĸ.com", false }, + { "http://asĸ.com", true }, // ĸ (U+0138) is Unicode script LATIN { "http://foo.кц.рф", false }, { "кц.рф\u202C", false }, { "кц.рф\u202D", false }, @@ -47,6 +47,12 @@ public static Collection data() { { "localhost", true }, { "https://localhost", true }, { "cool.test", true }, + { "grå.org", true }, // å is Latin script + { "münchen.de", true }, // ü is Latin script + { "慕田峪长城.网址", true }, // Great Wall site + // Мышкин is the idiot in Dostoyevsky's book. + { "Мышкин.рф", true }, // Cyrillic к U+043A + { "Мышκин.рф", false }, // Greek κ U+03BA { "https://github.com/signalapp/Signal-Android/compare/v6.23.2...v6.23.3", true } }); } diff --git a/app/src/test/java/org/thoughtcrime/securesms/util/LinkUtilTest_isValidPreviewUrl.kt b/app/src/test/java/org/thoughtcrime/securesms/util/LinkUtilTest_isValidPreviewUrl.kt index fd5dd777fb7..e2bdbc25be0 100644 --- a/app/src/test/java/org/thoughtcrime/securesms/util/LinkUtilTest_isValidPreviewUrl.kt +++ b/app/src/test/java/org/thoughtcrime/securesms/util/LinkUtilTest_isValidPreviewUrl.kt @@ -52,7 +52,10 @@ class LinkUtilTest_isValidPreviewUrl(private val input: String, private val outp arrayOf("https://cool.invalid.com", true), arrayOf("https://cool.localhost.signal.org", true), arrayOf("https://cool.test.blarg.gov", true), - arrayOf("https://github.com/signalapp/Signal-Android/compare/v6.23.2...v6.23.3", true) + arrayOf("https://github.com/signalapp/Signal-Android/compare/v6.23.2...v6.23.3", true), + arrayOf("https://grå.org", true), + arrayOf("https://grå.org/some/path", true), + arrayOf("http://grå.org", false) ) } }