Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 22 additions & 0 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
"react-dom": "^15.7.0",
"redis": "^0.10.3",
"request": "^2.88.2",
"request-filtering-agent": "^3.2.0",
"requirejs": "2.1.14",
"s-expression": "~2.2.0",
"script-loader": "^0.7.2",
Expand Down
115 changes: 99 additions & 16 deletions src/server.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,18 @@ const { drive } = require("googleapis/build/src/apis/drive/index.js");

var BACKREF_KEY = "originalProgram";

// Limits for the streaming proxy. /downloadImg gets larger/looser caps because
// images can legitimately be tens of MB; also we've seen e.g. Drive ?export=
// take a while to get going. SHAREURL is intended to always be program
// plaintext.
// NOTE(joe + claude): really the timeout maybe should be on idleness at
// startup/between bytes, not overall per completed request, but that's work to
// plumb into `request`
var IMAGE_PROXY_MAX_BYTES = 20 * 1024 * 1024; // 20 MB
var IMAGE_PROXY_TIMEOUT_MS = 30 * 1000; // 30 s
var SHAREURL_PROXY_MAX_BYTES = 1 * 1024 * 1024; // 1 MB
var SHAREURL_PROXY_TIMEOUT_MS = 10 * 1000; // 10 s

function start(config, onServerReady) {
var defaultOpts = {
PYRET: process.env.PYRET,
Expand All @@ -27,6 +39,7 @@ function start(config, onServerReady) {
var csrf = require('csurf');
var googleAuth = require('./google-auth.js');
var request = require('request');
var requestFilteringAgent = require('request-filtering-agent');
var mustache = require('mustache-express');
var url = require('url');
var fs = require('fs');
Expand Down Expand Up @@ -186,24 +199,74 @@ function start(config, onServerReady) {
});
}

app.get("/downloadImg", function(req, response) {
var parsed = url.parse(req.url);
var googleLink = decodeURIComponent(parsed.query.slice(0));
var googleParsed = url.parse(googleLink);
var gReq = request({url: googleLink, encoding: 'binary'}, function(error, imgResponse, body) {
if(error) {
response.status(400).send({type: "image-load-failure", error: "Unable to load image " + String(error)});
function proxyStreamFetch(opts) {
var res = opts.res;
res.set('X-Content-Type-Options', 'nosniff');
res.set('Content-Security-Policy', 'sandbox');

var parsed;
try { parsed = new URL(opts.url); }
catch (e) { return res.status(400).send({ error: 'invalid-url' }); }
if (opts.allowedHosts && !opts.allowedHosts(parsed.hostname)) {
return res.status(400).send({ error: 'host-not-allowed' });
}

var bytes = 0;
var upstream = request({
url: opts.url,
timeout: opts.timeoutMs,
agent: requestFilteringAgent.useAgent(opts.url),
followRedirect: function(resp) {
if (!opts.allowedHosts) return true;
try {
var next = new URL(resp.headers.location, opts.url);
return opts.allowedHosts(next.hostname);
} catch (_) { return false; }
},
});
// If the client disconnects (e.g. the browser aborts /load-shareurl after
// direct succeeded), tear down the upstream connection too — otherwise
// we'd keep streaming bytes from raw.githubusercontent.com to nowhere.
res.on('close', function() { upstream.destroy(); });
upstream.on('error', function(err) {
if (!res.headersSent) opts.onError(res, err);
});
upstream.on('response', function(upRes) {
if (opts.contentTypeOk && !opts.contentTypeOk(upRes.headers['content-type'])) {
upstream.destroy();
return res.status(400).send({ error: 'content-type-not-allowed', detail: upRes.headers['content-type'] });
}
else {
var h = imgResponse.headers;
var ct = h['content-type'];
if((!ct) || (ct.indexOf('image/') !== 0)) {
response.status(400).send({type: "non-image", error: "Invalid image type " + ct});
return;
}
response.set('content-type', ct);
response.end(body, 'binary');
res.status(upRes.statusCode);
if (upRes.headers['content-type']) {
res.set('content-type', upRes.headers['content-type']);
}
upRes.on('data', function(chunk) {
bytes += chunk.length;
if (bytes > opts.maxBytes) {
upstream.destroy();
if (!res.headersSent) res.status(502).send({ error: 'too-large' });
else res.destroy();
}
});
// Pipe upRes (IncomingMessage), not upstream (request object). The
// request library's .pipe copies upstream headers verbatim, which
// would overwrite the security headers set above.
upRes.pipe(res);
});
}

app.get("/downloadImg", function(req, response) {
var googleLink = decodeURIComponent(url.parse(req.url).query.slice(0));
proxyStreamFetch({
res: response,
url: googleLink,
allowedHosts: null,
maxBytes: IMAGE_PROXY_MAX_BYTES,
timeoutMs: IMAGE_PROXY_TIMEOUT_MS,
contentTypeOk: function(ct) { return ct && ct.indexOf('image/') === 0; },
onError: function(res, err) {
res.status(400).send({ type: 'image-load-failure', error: 'Unable to load image ' + String(err) });
},
});
});

Expand Down Expand Up @@ -565,6 +628,26 @@ function start(config, onServerReady) {

});

// Server-side proxy for #shareurl loads from hosts that some school networks
// block or will likely block (notably raw.githubusercontent.com).
// Eager-proxied client-side for any URL whose host is in
// SHAREURL_ALLOWED_HOSTS. We can expand this list as needed.
var SHAREURL_ALLOWED_HOSTS = new Set(['raw.githubusercontent.com']);

app.get("/load-shareurl", function(req, res) {
proxyStreamFetch({
res: res,
url: req.query.url,
allowedHosts: function(h) { return SHAREURL_ALLOWED_HOSTS.has(h); },
maxBytes: SHAREURL_PROXY_MAX_BYTES,
timeoutMs: SHAREURL_PROXY_TIMEOUT_MS,
contentTypeOk: null,
onError: function(res, err) {
res.status(502).send({ error: 'upstream-error' });
},
});
});


app.post("/share-image", function(req, res) {
var driveFileId = req.body.fileId;
Expand Down
124 changes: 124 additions & 0 deletions src/web/js/beforePyret.js
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,130 @@
var originalPageLoad = Date.now();
console.log("originalPageLoad: ", originalPageLoad);

// Transparently route browser fetches to allowlisted hosts through the
// server-side proxy at /load-shareurl, but only when the direct path doesn't
// work.
//
// Strategy: the FIRST fetch to an allowlisted host fires direct + proxied in
// parallel. We decide shouldProxy for the rest of the page-load from direct's
// response *headers*:
// - direct returned 2xx with content-type text/plain -> shouldProxy=false:
// serve direct's response, abort the in-flight proxy fetch.
// - direct failed, hung past timeout, or returned anything else
// -> shouldProxy=true:
// serve proxy's response.
// A key idea is that network-blocky things sometimes return 200 with a
// message page about blocking (or an error, but that counts as a fail). We
// don't want to accidentally think that's a success.
// shouldProxy state is in-memory and per-host — never persisted, since
// reachability changes between networks and a stale value would silently
// break loads.
//
// Installed on the global fetch as early as possible so it catches every fetch
// caller; some of them are in the pyret-lang runtime and would be otherwise
// difficult to configure.
const SHAREURL_PROXY_HOSTS = new Set(['raw.githubusercontent.com']);
const SHAREURL_DIRECT_TIMEOUT_MS = 5000;
const _origFetch = window.fetch.bind(window);

const _shareurlShouldProxy = new Map(); // host -> boolean
const _shareurlShouldProxyInflight = new Map(); // host -> Promise<boolean>

function _shareurlProxyUrl(fetchInput) {
return '/load-shareurl?url=' + encodeURIComponent(_shareurlInputToUrl(fetchInput));
}

function _shareurlInputToUrl(fetchInput) {
return (typeof fetchInput === 'string') ? fetchInput
: (typeof Request !== 'undefined' && fetchInput instanceof Request) ? fetchInput.url
: String(fetchInput);
}

function _shareurlVerifyDirect(r) {
if (!r.ok) return false;
const ct = (r.headers.get('content-type') || '').toLowerCase();
// Source files served from raw.githubusercontent.com come back as
// text/plain (.arr, .json, .csv, .md all do). Anything else — HTML block
// pages, captive portals, surprise content types — we don't trust as a
// real upstream response.
return ct.startsWith('text/plain');
}

function _shareurlFetch(shouldProxy, fetchInput, fetchInit) {
const maybeProxyInput = shouldProxy ? _shareurlProxyUrl(fetchInput) : fetchInput;
return _origFetch(maybeProxyInput, fetchInit);
}

function _shareurlRace(fetchInput, fetchInit) {
const proxyCtrl = new AbortController();
// NOTE(joe): The signal overwrite is technically not the right fetch()
// polyfill. If the caller elsewhere in the codebase provided a different
// signal (which in the fetch API is only for aborting as of April '26), that
// caller aborting through that signal won't cancel the proxy fetch.
// I'm OK letting that case slip through here in exchange for not having a
// bunch of extra event handler forwarding
const proxyP = _origFetch(_shareurlProxyUrl(fetchInput),
Object.assign({}, fetchInit, { signal: proxyCtrl.signal }));
const directP = _origFetch(fetchInput, fetchInit).then(r => {
if (!_shareurlVerifyDirect(r)) throw new Error('direct request failed');
return r;
});

// shouldProxy: false iff direct verified before the timeout, else true.
// Whether to proxy is decided solely on whether direct succeeds or not
const shouldProxyPromise = Promise.race([
directP.then(() => false, () => true),
new Promise(resolve => setTimeout(() => resolve(true), SHAREURL_DIRECT_TIMEOUT_MS)),
]);

// Settlement-order check: if direct verifies before proxy returns, abort
// the in-flight proxy to stop wasting server bandwidth. We must NOT
// abort once proxy has already returned, since by then the caller is
// reading proxy's body and aborting would error its stream mid-read.
const directFinishedSuccessfullyAndFirstP = Promise.race([
directP.then(() => true, () => false),
proxyP.then(() => false, () => false),
]);
directFinishedSuccessfullyAndFirstP.then(directFirst => {
if (directFirst) proxyCtrl.abort();
});

// Caller's response: whichever of direct-verified or proxy fulfills
// first. If both fail, surface proxy's error (the more authoritative
// upstream — direct's may just be 'direct-not-verified').
const responsePromise = Promise.any([directP, proxyP]).catch(
aggErr => Promise.reject(aggErr.errors[1] || aggErr.errors[0])
);

return { responsePromise, shouldProxyPromise };
}

window.fetch = function(fetchInput, fetchInit) {
let host;
try { host = new URL(_shareurlInputToUrl(fetchInput), window.location.href).hostname; }
catch (_) { return _origFetch(fetchInput, fetchInit); }
if (!SHAREURL_PROXY_HOSTS.has(host)) return _origFetch(fetchInput, fetchInit);

const shouldProxy = _shareurlShouldProxy.get(host);
const inflight = _shareurlShouldProxyInflight.get(host);
if (shouldProxy !== undefined) {
return _shareurlFetch(shouldProxy, fetchInput, fetchInit);
} else if (inflight) {
// shouldProxy pending: queue this fetch on it and issue a single fresh
// request once shouldProxy is decided.
return inflight.then(sp => _shareurlFetch(sp, fetchInput, fetchInit));
} else {
// First fetch to this host this page-load: run the race.
const { responsePromise, shouldProxyPromise } = _shareurlRace(fetchInput, fetchInit);
_shareurlShouldProxyInflight.set(host, shouldProxyPromise);
shouldProxyPromise.then(sp => {
_shareurlShouldProxy.set(host, sp);
_shareurlShouldProxyInflight.delete(host);
});
return responsePromise;
}
};

const isEmbedded = window.parent !== window;

var shareAPI = makeShareAPI(process.env.CURRENT_PYRET_RELEASE);
Expand Down
Loading