Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,10 @@
import org.thoughtcrime.securesms.mms.ImageSlide;
import org.thoughtcrime.securesms.mms.SlidesClickedListener;
import org.signal.core.util.Util;
import org.thoughtcrime.securesms.util.LinkUtil;
import org.thoughtcrime.securesms.util.ViewUtil;

import java.net.IDN;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Locale;
Expand Down Expand Up @@ -199,6 +201,12 @@ public void setLinkPreview(@NonNull RequestManager requestManager, @NonNull Link
HttpUrl url = HttpUrl.parse(linkPreview.getUrl());
if (url != null) {
domain = url.topPrivateDomain();
if (domain != null) {
String unicodeDomain = IDN.toUnicode(domain);
if (LinkUtil.isLegalUrl(unicodeDomain)) {
domain = unicodeDomain;
}
}
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
import android.text.InputFilter;
import android.text.TextUtils;
import android.util.AttributeSet;
import android.view.inputmethod.EditorInfo;
import android.view.inputmethod.InputConnection;
import android.view.inputmethod.InputConnectionWrapper;

import androidx.annotation.NonNull;
import androidx.annotation.Nullable;
Expand All @@ -18,6 +21,7 @@
import org.thoughtcrime.securesms.components.emoji.EmojiProvider.EmojiDrawable;
import org.thoughtcrime.securesms.keyvalue.SignalStore;
import org.thoughtcrime.securesms.util.EditTextExtensionsKt;
import org.thoughtcrime.securesms.util.LinkUtil;
import org.thoughtcrime.securesms.util.ServiceUtil;
import org.thoughtcrime.securesms.util.TextSecurePreferences;
import org.signal.core.util.Util;
Expand Down Expand Up @@ -61,6 +65,27 @@ public EmojiEditText(Context context, AttributeSet attrs, int defStyleAttr) {
}
}

@Override
public InputConnection onCreateInputConnection(EditorInfo outAttrs) {
InputConnection base = super.onCreateInputConnection(outAttrs);
if (base == null) return null;
return new InputConnectionWrapper(base, true) {
@Override
public boolean commitText(CharSequence text, int newCursorPosition) {
if (text != null) {
String trimmed = text.toString().trim();
if (LinkUtil.isLegalUrl(trimmed)) {
String display = LinkUtil.toDisplayUrl(trimmed);
if (!display.equals(trimmed)) {
return super.commitText(display, newCursorPosition);
}
}
}
return super.commitText(text, newCursorPosition);
}
};
}

public void insertEmoji(String emoji) {
final int start = getSelectionStart();
final int end = getSelectionEnd();
Expand Down Expand Up @@ -121,6 +146,17 @@ public boolean onTextContextMenuItem(int id) {
if (TextUtils.equals(Util.COPY_LABEL, label) && shouldPersistSignalStylingWhenPasting()) {
return super.onTextContextMenuItem(id);
} else {
CharSequence pasteText = getTextFromClipData(clipData);
if (pasteText != null) {
String trimmed = pasteText.toString().trim();
if (LinkUtil.isLegalUrl(trimmed)) {
String display = LinkUtil.toDisplayUrl(trimmed);
if (!display.equals(trimmed)) {
pasteUrlDisplay(display);
return true;
}
}
}
return super.onTextContextMenuItem(android.R.id.pasteAsPlainText);
}
}
Expand All @@ -140,6 +176,15 @@ public boolean onTextContextMenuItem(int id) {
return super.onTextContextMenuItem(id);
}

private void pasteUrlDisplay(@NonNull String display) {
if (getText() == null) return;
int start = Math.max(0, getSelectionStart());
int end = Math.max(0, getSelectionEnd());
if (start > end) { int tmp = start; start = end; end = tmp; }
getText().replace(start, end, display);
setSelection(start + display.length());
}

private @Nullable CharSequence getTextFromClipData(@Nullable ClipData data) {
if (data != null && data.getItemCount() > 0) {
return data.getItemAt(0).coerceToText(getContext());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import io.reactivex.rxjava3.schedulers.Schedulers
import org.signal.core.models.media.Media
import org.signal.core.util.logging.Log
import org.thoughtcrime.securesms.providers.BlobProvider
import org.thoughtcrime.securesms.util.LinkUtil
import org.thoughtcrime.securesms.util.MediaUtil
import org.thoughtcrime.securesms.util.RemoteConfig
import org.thoughtcrime.securesms.util.UriUtil
Expand All @@ -24,7 +25,7 @@ class ShareRepository(context: Context) {
return when (unresolvedShareData) {
is UnresolvedShareData.ExternalMultiShare -> Single.fromCallable { resolve(unresolvedShareData) }
is UnresolvedShareData.ExternalSingleShare -> Single.fromCallable { resolve(unresolvedShareData) }
is UnresolvedShareData.ExternalPrimitiveShare -> Single.just(ResolvedShareData.Primitive(unresolvedShareData.text))
is UnresolvedShareData.ExternalPrimitiveShare -> Single.just(ResolvedShareData.Primitive(prettifyIfUrl(unresolvedShareData.text)))
}.subscribeOn(Schedulers.io())
}

Expand Down Expand Up @@ -130,6 +131,11 @@ class ShareRepository(context: Context) {
companion object {
private val TAG = Log.tag(ShareRepository::class.java)

private fun prettifyIfUrl(text: CharSequence): CharSequence {
val trimmed = text.toString().trim()
return if (LinkUtil.isLegalUrl(trimmed)) LinkUtil.toDisplayUrl(trimmed) else text
}

private fun getMimeType(context: Context, uri: Uri, mimeType: String?, fileExtension: String? = null): String {
var updatedMimeType = MediaUtil.getMimeType(context, uri, fileExtension)
if (updatedMimeType == null) {
Expand Down
177 changes: 172 additions & 5 deletions app/src/main/java/org/thoughtcrime/securesms/util/LinkUtil.kt
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,13 @@ package org.thoughtcrime.securesms.util

import okhttp3.HttpUrl.Companion.toHttpUrlOrNull
import org.thoughtcrime.securesms.stickers.StickerUrl
import java.io.ByteArrayOutputStream
import java.net.IDN
import java.net.URI
import java.net.URISyntaxException
import java.nio.ByteBuffer
import java.nio.charset.CharacterCodingException
import java.nio.charset.CodingErrorAction
import java.util.Objects
import java.util.regex.Pattern

Expand All @@ -12,11 +17,8 @@ import java.util.regex.Pattern
*/
object LinkUtil {
private val DOMAIN_PATTERN = Pattern.compile("^(https?://)?([^/]+).*$")
private val ALL_ASCII_PATTERN = Pattern.compile("^[\\x00-\\x7F]*$")
private val ALL_NON_ASCII_PATTERN = Pattern.compile("^[^\\x00-\\x7F]*$")
private val ILLEGAL_CHARACTERS_PATTERN = Pattern.compile("[\u202C\u202D\u202E\u2500-\u25FF]")
private val ILLEGAL_PERIODS_PATTERN = Pattern.compile("(\\.{2,}|…)")

private val INVALID_DOMAINS = listOf("example", "example\\.com", "example\\.net", "example\\.org", "i2p", "invalid", "localhost", "onion", "test")
private val INVALID_DOMAINS_REGEX: Regex = Regex("^(.+\\.)?(${INVALID_DOMAINS.joinToString("|")})\\.?\$")

Expand Down Expand Up @@ -84,13 +86,178 @@ object LinkUtil {
return LegalCharactersResult(false)
}

val cleanedDomain = domain.replace("\\.".toRegex(), "")
return LegalCharactersResult(
isLegal = ALL_ASCII_PATTERN.matcher(cleanedDomain).matches() || ALL_NON_ASCII_PATTERN.matcher(cleanedDomain).matches(),
isLegal = !mixesScripts(domain),
domain = domain
)
}

/**
* Returns true if [str] contains letters from more than one Unicode script,
* ignoring characters with script COMMON or INHERITED (digits, punctuation, etc.).
* Used to detect potential homograph attacks in domain names: a domain that mixes,
* say, Cyrillic and Latin letters is suspicious, while an IDN label like "grå"
* that uses only Latin letters (including extended Latin like å) is fine.
*/
private fun mixesScripts(str: String): Boolean {
var firstScript: Character.UnicodeScript? = null
var i = 0
while (i < str.length) {
val cp = str.codePointAt(i)
if (Character.isLetter(cp)) {
val script = Character.UnicodeScript.of(cp)
if (script != Character.UnicodeScript.COMMON && script != Character.UnicodeScript.INHERITED) {
if (firstScript == null) {
firstScript = script
} else if (script != firstScript) {
return true
}
}
}
i += Character.charCount(cp)
}
return false
}

/**
* Converts a URL to a human-readable display form:
* 1. ACE/punycode domain labels are decoded to Unicode when the decoded domain passes [isLegalUrl].
* 2. Percent-encoded path bytes are decoded when they represent ASCII letters, ASCII digits,
* hyphens, or sequences of UTF-8 bytes that decode to Unicode letters or digits.
* All other percent-encoded bytes (spaces, slashes, control chars, …) are left as-is.
*/
@JvmStatic
fun toDisplayUrl(url: String): String {
return try {
val uri = URI(url)
val host = uri.host ?: return url

val unicodeHost = IDN.toUnicode(host)
val displayHost = if (isLegalUrl(unicodeHost)) unicodeHost else host
val niceRawPath = decodeUrlSafeChars(uri.rawPath ?: "")

buildString {
if (uri.scheme != null) append("${uri.scheme}://")
if (uri.rawUserInfo != null) append("${uri.rawUserInfo}@")
append(displayHost)
if (uri.port != -1) append(":${uri.port}")
append(niceRawPath)
if (uri.rawQuery != null) append("?${uri.rawQuery}")
if (uri.rawFragment != null) append("#${uri.rawFragment}")
}
} catch (e: Exception) {
url
}
}

/**
* Decodes percent-encoded byte sequences that represent ASCII letters, ASCII digits, hyphens,
* or multi-byte UTF-8 sequences whose decoded Unicode code point is a letter or digit.
* All other percent-encoded sequences are left unchanged.
*
* If fully decoding all percent-encoded bytes would not yield valid UTF-8, the string is
* returned unchanged — partial decoding would produce misleading output (e.g. a bare lead
* byte next to a decoded ASCII character that happened to share a code unit with a
* continuation byte).
*/
private fun decodeUrlSafeChars(encoded: String): String {
if (!encoded.contains('%')) return encoded
if (!isFullyDecodedUtf8Valid(encoded)) return encoded
val sb = StringBuilder(encoded.length)
var i = 0
while (i < encoded.length) {
val c = encoded[i]
if (c != '%' || i + 2 >= encoded.length) {
sb.append(c)
i++
continue
}
val firstHex = encoded.substring(i + 1, i + 3).toIntOrNull(16)
if (firstHex == null) {
sb.append(c)
i++
continue
}
val firstByte = firstHex and 0xFF
val cpByteCount = when {
firstByte and 0x80 == 0 -> 1 // 0xxxxxxx ASCII
firstByte and 0xE0 == 0xC0 -> 2 // 110xxxxx 2-byte UTF-8
firstByte and 0xF0 == 0xE0 -> 3 // 1110xxxx 3-byte UTF-8
firstByte and 0xF8 == 0xF0 -> 4 // 11110xxx 4-byte UTF-8
else -> 0 // continuation or invalid lead byte
}
if (cpByteCount <= 0) {
sb.append(encoded, i, i + 3)
i += 3
continue
}
// Collect cpByteCount consecutive %XX tokens.
val rawTokens = ArrayList<String>(cpByteCount)
val rawBytes = ArrayList<Byte>(cpByteCount)
var j = i
var ok = true
for (k in 0 until cpByteCount) {
if (j + 2 >= encoded.length || encoded[j] != '%') { ok = false; break }
val hex = encoded.substring(j + 1, j + 3)
val bInt = hex.toIntOrNull(16)
if (bInt == null) { ok = false; break }
if (k > 0 && (bInt and 0xC0 != 0x80)) { ok = false; break } // must be continuation byte
rawTokens.add(encoded.substring(j, j + 3))
rawBytes.add(bInt.toByte())
j += 3
}
if (!ok || rawBytes.size != cpByteCount) {
// Could not assemble a complete code point — emit only the first %XX raw.
sb.append(encoded, i, i + 3)
i += 3
continue
}
val byteArray = rawBytes.toByteArray()
val decoded = String(byteArray, Charsets.UTF_8)
val cp = decoded.codePointAt(0)
if (cp != 0xFFFD && (Character.isLetter(cp) || Character.isDigit(cp) || cp == '-'.code)) {
sb.appendCodePoint(cp)
} else {
sb.append(rawTokens.joinToString(""))
}
i = j
}
return sb.toString()
}

/**
* Returns true if decoding every percent-encoded byte sequence in [encoded] would yield a
* byte stream that is valid UTF-8. Literal (non-encoded) characters are already valid Unicode
* and always contribute valid UTF-8 bytes. Percent sequences with invalid hex digits are
* treated as literal '%' characters.
*/
private fun isFullyDecodedUtf8Valid(encoded: String): Boolean {
val buf = ByteArrayOutputStream(encoded.length)
var i = 0
while (i < encoded.length) {
if (encoded[i] == '%' && i + 2 < encoded.length) {
val hex = encoded.substring(i + 1, i + 3).toIntOrNull(16)
if (hex != null) {
buf.write(hex)
i += 3
continue
}
}
val cp = encoded.codePointAt(i)
buf.write(String(Character.toChars(cp)).toByteArray(Charsets.UTF_8))
i += Character.charCount(cp)
}
return try {
Charsets.UTF_8.newDecoder()
.onMalformedInput(CodingErrorAction.REPORT)
.onUnmappableCharacter(CodingErrorAction.REPORT)
.decode(ByteBuffer.wrap(buf.toByteArray()))
true
} catch (_: CharacterCodingException) {
false
}
}

@JvmStatic
private fun isValidURI(linkUri: String?): Boolean {
return if (linkUri == null) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ public static Collection<Object[]> data() {
{ "https://abcdefg.i2p", true },
{ "http://кц.com", false },
{ "кц.com", false },
{ "http://asĸ.com", false },
{ "http://asĸ.com", true }, // ĸ (U+0138) is Unicode script LATIN
{ "http://foo.кц.рф", false },
{ "кц.рф\u202C", false },
{ "кц.рф\u202D", false },
Expand All @@ -47,6 +47,12 @@ public static Collection<Object[]> data() {
{ "localhost", true },
{ "https://localhost", true },
{ "cool.test", true },
{ "grå.org", true }, // å is Latin script
{ "münchen.de", true }, // ü is Latin script
{ "慕田峪长城.网址", true }, // Great Wall site
// Мышкин is the idiot in Dostoyevsky's book.
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I particularly like this test.

{ "Мышкин.рф", true }, // Cyrillic к U+043A
{ "Мышκин.рф", false }, // Greek κ U+03BA
{ "https://github.com/signalapp/Signal-Android/compare/v6.23.2...v6.23.3", true }
});
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,10 @@ class LinkUtilTest_isValidPreviewUrl(private val input: String, private val outp
arrayOf("https://cool.invalid.com", true),
arrayOf("https://cool.localhost.signal.org", true),
arrayOf("https://cool.test.blarg.gov", true),
arrayOf("https://github.com/signalapp/Signal-Android/compare/v6.23.2...v6.23.3", true)
arrayOf("https://github.com/signalapp/Signal-Android/compare/v6.23.2...v6.23.3", true),
arrayOf("https://grå.org", true),
arrayOf("https://grå.org/some/path", true),
arrayOf("http://grå.org", false)
)
}
}
Expand Down