Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/loose-pears-divide.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@embedly/platforms": minor
---

Revert platform URL matching back to regex-based matchers.
4 changes: 2 additions & 2 deletions packages/platforms/src/CBC.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@ describe("CBC", () => {
});

it("extracts cbc id", () => {
const match = cbc.pattern.exec(
const match = cbc.regex.exec(
"https://www.cbc.ca/news/canada/1.7654321"
);
expect(match?.pathname.groups.cbc_id).toBe("1.7654321");
expect(match?.groups?.cbc_id).toBe("1.7654321");
});
});
17 changes: 6 additions & 11 deletions packages/platforms/src/CBC.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,27 +4,22 @@ import he from "he";
import { CF_CACHE_OPTIONS } from "./constants.ts";
import { type BaseEmbedData, EmbedlyPlatform } from "./Platform.ts";
import { EmbedlyPlatformType } from "./types.ts";
import { signProxyUrl, validatePatternMatch } from "./utils.ts";
import { signProxyUrl, validateRegexMatch } from "./utils.ts";

export class CBC extends EmbedlyPlatform {
readonly color = [215, 36, 42] as const;
readonly emoji = "<:cbc:1409997044495683674>";
readonly pattern = new URLPattern({
hostname: "{*.}?cbc.ca",
pathname: "*/:cbc_id{/}?"
});
readonly regex =
/^(?:https?:\/\/)?(?:[\w-]+\.)*cbc\.ca\/.*(?<cbc_id>\d\.\d+)/;

constructor() {
super(EmbedlyPlatformType.CBC, "cbc.ca");
}

async parsePostId(url: string): Promise<string> {
const match = this.pattern.exec(url);
validatePatternMatch(
match,
"Invalid CBC URL: could not extract ID"
);
const { cbc_id } = match.pathname.groups;
const match = this.regex.exec(url);
validateRegexMatch(match, "Invalid CBC URL: could not extract ID");
const { cbc_id } = match.groups;
return cbc_id;
}

Expand Down
12 changes: 2 additions & 10 deletions packages/platforms/src/Instagram.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,17 +32,9 @@ describe("Instagram", () => {
});

it("extracts shortcode", () => {
const match = instagram.pattern.exec(
const match = instagram.regex.exec(
"https://www.instagram.com/p/CxYz123_Ab/"
);
expect(match?.pathname.groups.ig_shortcode).toBe("CxYz123_Ab");
});

it("extracts shortcode with user prefix", () => {
const match = instagram.pattern.exec(
"https://www.instagram.com/natgeo/reel/CxYz123/"
);
expect(match?.pathname.groups.ig_shortcode).toBe("CxYz123");
expect(match?.pathname.groups.user).toBe("natgeo");
expect(match?.groups?.ig_shortcode).toBe("CxYz123_Ab");
});
});
15 changes: 6 additions & 9 deletions packages/platforms/src/Instagram.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,13 @@ import {
EmbedlyPlatform
} from "./Platform.ts";
import { EmbedlyPlatformType } from "./types.ts";
import { validatePatternMatch } from "./utils.ts";
import { validateRegexMatch } from "./utils.ts";

export class Instagram extends EmbedlyPlatform {
readonly color = [225, 48, 108] as const;
readonly emoji = "<:instagram:1386639712013254748>";
readonly pattern = new URLPattern({
hostname: "{*.}?instagram.com",
pathname:
"/{:user/}?(p|share|reels|reel|stories)/:ig_shortcode{/*}?"
});
readonly regex =
/^(?:https?:\/\/)?(?:[\w-]+\.)*instagram\.com\/(?:[A-Za-z0-9_.]+\/)?(p|share|reels|reel|stories)\/(?<ig_shortcode>[A-Za-z0-9-_]+)/;

constructor() {
super(EmbedlyPlatformType.Instagram, "insta");
Expand All @@ -31,12 +28,12 @@ export class Instagram extends EmbedlyPlatform {
});
url = req.url;
}
const match = this.pattern.exec(url);
validatePatternMatch(
const match = this.regex.exec(url);
validateRegexMatch(
match,
"Invalid Instagram URL: could not extract shortcode"
);
const { ig_shortcode } = match.pathname.groups;
const { ig_shortcode } = match.groups;
return ig_shortcode;
}

Expand Down
20 changes: 9 additions & 11 deletions packages/platforms/src/Mastodon.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,34 +7,32 @@ import {
EmbedlyPlatform
} from "./Platform.ts";
import type { EmbedlyPlatformType } from "./types.ts";
import { signProxyUrl, validatePatternMatch } from "./utils.ts";
import { signProxyUrl, validateRegexMatch } from "./utils.ts";

export abstract class EmbedlyMastodon extends EmbedlyPlatform {
abstract readonly base_url: string;

readonly pattern = new URLPattern({
hostname: "{*.}?__PLACEHOLDER__"
});
readonly regex: RegExp;

constructor(
name: EmbedlyPlatformType,
cache_prefix: string,
hostname: string
) {
super(name, cache_prefix);
this.pattern = new URLPattern({
hostname: `{*.}?${hostname}`,
pathname: "/@:username/posts/:status_id{/}?"
});
const escapedHostname = hostname.replaceAll(".", "\\.");
this.regex = new RegExp(
`^(?:https?:\\/\\/)?(?:[\\w-]+\\.)*${escapedHostname}\\/@[^/]+\\/posts\\/(?<status_id>[^/?#]+)`
);
}

async parsePostId(url: string): Promise<string> {
const match = this.pattern.exec(url);
validatePatternMatch(
const match = this.regex.exec(url);
validateRegexMatch(
match,
`Invalid ${this.name} URL: could not extract status ID`
);
return match.pathname.groups.status_id;
return match.groups.status_id;
}

async fetchPost(
Expand Down
8 changes: 2 additions & 6 deletions packages/platforms/src/Platform.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ export interface CloudflareEnv {
export abstract class EmbedlyPlatform {
abstract readonly color: readonly [number, number, number];
abstract readonly emoji: string;
abstract readonly pattern: URLPattern;
abstract readonly regex: RegExp;

public log_messages: EmbedlyPlatformLogMessages;

Expand All @@ -42,11 +42,7 @@ export abstract class EmbedlyPlatform {
}

public matchesUrl(url: string): boolean {
try {
return this.pattern.test(url);
} catch {
return false;
}
return this.regex.test(url);
}

abstract parsePostId(
Expand Down
47 changes: 9 additions & 38 deletions packages/platforms/src/Reddit.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,46 +6,17 @@ import {
EmbedlyPlatform
} from "./Platform.ts";
import { EmbedlyPlatformType } from "./types.ts";
import { validatePatternMatch } from "./utils.ts";
import { validateRegexMatch } from "./utils.ts";

const REDDIT_PATTERN_COMMENTS = new URLPattern({
hostname: "{(www|old).}?reddit.com",
pathname: "/r/:subreddit/comments/:post_id{/*}?"
});

const REDDIT_PATTERN_SHARE = new URLPattern({
hostname: "{(www|old).}?reddit.com",
pathname: "/r/:subreddit/s/:share_id{/}?"
});

const REDDIT_PATTERN_SHORTLINK = new URLPattern({
hostname: "redd.it",
pathname: "/:short_id{/}?"
});

const REDDIT_PATTERN_FOLLOWUP = new URLPattern({
hostname: "{(www|old|m).}?reddit.com",
pathname: "/r/:subreddit/comments/:post_id{/*}?"
});
const REDDIT_REGEX_MAIN =
/^https?:\/\/(?:www\.|old\.)?(?:reddit\.com\/r\/[A-Za-z0-9_]+\/(?:comments\/[A-Za-z0-9]+(?:\/[^/\s]+)?|s\/[A-Za-z0-9]+)|redd\.it\/[A-Za-z0-9]+)\/?/;
const REDDIT_REGEX_FOLLOWUP =
/^https?:\/\/(?:www\.|old\.|m\.)?reddit\.com\/r\/(?<subreddit>\w+)\/comments\/(?<post_id>[a-z0-9]+)/;

export class Reddit extends EmbedlyPlatform {
readonly color = [255, 86, 0] as const;
readonly emoji = "<:reddit:1461320093240655922>";
readonly pattern = REDDIT_PATTERN_COMMENTS;

public matchesUrl(url: string): boolean {
return [
REDDIT_PATTERN_COMMENTS,
REDDIT_PATTERN_SHARE,
REDDIT_PATTERN_SHORTLINK
].some((p) => {
try {
return p.test(url);
} catch {
return false;
}
});
}
readonly regex = REDDIT_REGEX_MAIN;

constructor() {
super(EmbedlyPlatformType.Reddit, "reddit");
Expand All @@ -65,12 +36,12 @@ export class Reddit extends EmbedlyPlatform {
...CF_CACHE_OPTIONS
});
console.log(req);
const match = REDDIT_PATTERN_FOLLOWUP.exec(req.url);
validatePatternMatch(
const match = REDDIT_REGEX_FOLLOWUP.exec(req.url);
validateRegexMatch(
match,
"Invalid Reddit URL: could not extract post ID or subreddit"
);
const { post_id, subreddit } = match.pathname.groups;
const { post_id, subreddit } = match.groups;
return `${subreddit}/${post_id}`;
}

Expand Down
5 changes: 2 additions & 3 deletions packages/platforms/src/Threads.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,10 +26,9 @@ describe("Threads", () => {
});

it("extracts shortcode and username", () => {
const match = threads.pattern.exec(
const match = threads.regex.exec(
"https://www.threads.net/@zuck/post/CxYz123_Ab"
);
expect(match?.pathname.groups.thread_shortcode).toBe("CxYz123_Ab");
expect(match?.pathname.groups.username).toBe("zuck");
expect(match?.groups?.thread_shortcode).toBe("CxYz123_Ab");
});
});
14 changes: 6 additions & 8 deletions packages/platforms/src/Threads.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,30 +5,28 @@ import {
EmbedlyPlatform
} from "./Platform.ts";
import { EmbedlyPlatformType } from "./types.ts";
import { validatePatternMatch } from "./utils.ts";
import { validateRegexMatch } from "./utils.ts";

const alphabet =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";

export class Threads extends EmbedlyPlatform {
readonly color = [0, 0, 0] as const;
readonly emoji = "<:threads:1413343483929956446>";
readonly pattern = new URLPattern({
hostname: "{*.}?threads.net",
pathname: "/@:username/post/:thread_shortcode{/}?"
});
readonly regex =
/^(?:https?:\/\/)?(?:[\w-]+\.)*threads\.net\/@.*\/post\/(?<thread_shortcode>[A-Za-z0-9-_]+)/;

constructor() {
super(EmbedlyPlatformType.Threads, "threads");
}

async parsePostId(url: string): Promise<string> {
const match = this.pattern.exec(url);
validatePatternMatch(
const match = this.regex.exec(url);
validateRegexMatch(
match,
"Invalid Threads URL: could not extract shortcode"
);
const { thread_shortcode } = match.pathname.groups;
const { thread_shortcode } = match.groups;
const thread_id = thread_shortcode
.split("")
.reduce(
Expand Down
14 changes: 5 additions & 9 deletions packages/platforms/src/TikTok.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,12 @@ describe("TikTok", () => {
});

it("extracts user and video id from detail pattern", () => {
const detail_pattern = new URLPattern({
hostname: "{(m|www|vm).}?tiktok.com",
pathname: "/:tiktok_user/video/:tiktok_id{/}?"
});
const match = detail_pattern.exec(
const detail_regex =
/https:\/\/(?:m|www|vm)?\.?tiktok\.com\/(?<tiktok_user>@[\w.-]+)\/video\/(?<tiktok_id>\d+)/;
const match = detail_regex.exec(
"https://www.tiktok.com/@cooluser/video/7234567890123456789"
);
expect(match?.pathname.groups.tiktok_user).toBe("@cooluser");
expect(match?.pathname.groups.tiktok_id).toBe(
"7234567890123456789"
);
expect(match?.groups?.tiktok_user).toBe("@cooluser");
expect(match?.groups?.tiktok_id).toBe("7234567890123456789");
});
});
21 changes: 9 additions & 12 deletions packages/platforms/src/TikTok.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,34 +7,31 @@ import {
EmbedlyPlatform
} from "./Platform.ts";
import { EmbedlyPlatformType } from "./types.ts";
import { validatePatternMatch } from "./utils.ts";
import { validateRegexMatch } from "./utils.ts";

const TIKTOK_PATTERN_MAIN = new URLPattern({
hostname: "{*.}?tiktok.com"
});
const TIKTOK_REGEX_MAIN =
/^(?:https?:\/\/)?(?:[\w-]+\.)*tiktok\.com(?:\/|$)/;

const TIKTOK_PATTERN_DETAIL = new URLPattern({
hostname: "{(m|www|vm).}?tiktok.com",
pathname: "/:tiktok_user/video/:tiktok_id{/}?"
});
const TIKTOK_REGEX_DETAIL =
/^https:\/\/(?:m|www|vm)?\.?tiktok\.com\/(?<tiktok_user>@[\w.-]+)\/video\/(?<tiktok_id>\d+)/;

export class TikTok extends EmbedlyPlatform {
readonly color = [57, 118, 132] as const;
readonly emoji = "<:tiktok:1386641825963708446>";
readonly pattern = TIKTOK_PATTERN_MAIN;
readonly regex = TIKTOK_REGEX_MAIN;

constructor() {
super(EmbedlyPlatformType.TikTok, "tiktok");
}

async parsePostId(url: string): Promise<string> {
const req = await fetch(url, { redirect: "follow" });
const match = TIKTOK_PATTERN_DETAIL.exec(req.url);
validatePatternMatch(
const match = TIKTOK_REGEX_DETAIL.exec(req.url);
validateRegexMatch(
match,
"Invalid TikTok URL: could not extract user/id"
);
const { tiktok_user, tiktok_id } = match.pathname.groups;
const { tiktok_user, tiktok_id } = match.groups;
return `${tiktok_user}/${tiktok_id}`;
}

Expand Down
4 changes: 2 additions & 2 deletions packages/platforms/src/Twitter.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@ describe("Twitter", () => {
});

it("extracts tweet id", () => {
const match = twitter.pattern.exec(
const match = twitter.regex.exec(
"https://x.com/elonmusk/status/1234567890"
);
expect(match?.pathname.groups.tweet_id).toBe("1234567890");
expect(match?.groups?.tweet_id).toBe("1234567890");
});
});
Loading
Loading