Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -841,6 +841,41 @@ do not provide sufficiently complete / non-buggy `TextDecoder` APIs.
> but they are fixing them and the expected update window is short.\
> If you want to circumvent browser bugs, use full `@exodus/bytes/encoding.js` import.

### `@exodus/bytes/whatwg.js`

WHATWG helpers

```js
import '@exodus/bytes/encoding.js' // For full legacy multi-byte encodings support
import { percentEncodeAfterEncoding } from '@exodus/bytes/whatwg.js'
```

#### `percentEncodeAfterEncoding(encoding, input, percentEncodeSet, spaceAsPlus = false)`

Implements [percent-encode after encoding](https://url.spec.whatwg.org/#string-percent-encode-after-encoding)
per WHATWG URL specification.

> [!IMPORTANT]
> You must import `@exodus/bytes/encoding.js` for this API to accept legacy multi-byte encodings.

Encodings `utf16-le`, `utf16-be`, and `replacement` are not accepted.

[C0 control percent-encode set](https://url.spec.whatwg.org/#c0-control-percent-encode-set) is
always percent-encoded.

`percentEncodeSet` is an addition to that, and must be a string of unique increasing codepoints
in range 0x20 - 0x7e, e.g. `' "#<>'`.

This method accepts [DOMStrings](https://webidl.spec.whatwg.org/#idl-DOMString) and converts them
to [USVStrings](https://webidl.spec.whatwg.org/#idl-USVString).
This is different from e.g. `encodeURI` and `encodeURIComponent` which throw on surrogates:
```js
> percentEncodeAfterEncoding('utf8', '\ud800', ' "#$%&+,/:;<=>?@[\\]^`{|}') // component
'%EF%BF%BD'
> encodeURIComponent('\ud800')
Uncaught URIError: URI malformed
```

## Changelog

See [GitHub Releases](https://github.com/ExodusOSS/bytes/releases) tab
Expand Down
31 changes: 31 additions & 0 deletions fallback/percent.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import { decodeAscii, encodeLatin1 } from './latin1.js'
import { decode2string } from './_utils.js'

const ERR = 'percentEncodeSet must be a string of unique increasing codepoints in range 0x20 - 0x7e'
const percentMap = new Map()
let hex, base

export function percentEncoder(set, spaceAsPlus = false) {
if (typeof set !== 'string' || /[^\x20-\x7E]/.test(set)) throw new TypeError(ERR)
if (typeof spaceAsPlus !== 'boolean') throw new TypeError('spaceAsPlus must be boolean')
const id = set + +spaceAsPlus
const cached = percentMap.get(id)
if (cached) return cached

const n = encodeLatin1(set).sort() // string checked above to be ascii
if (decodeAscii(n) !== set || new Set(n).size !== n.length) throw new TypeError(ERR)

if (!base) {
hex = Array.from({ length: 256 }, (_, i) => `%${i.toString(16).padStart(2, '0').toUpperCase()}`)
base = hex.map((h, i) => (i < 0x20 || i > 0x7e ? h : String.fromCharCode(i)))
}

const map = base.slice() // copy
for (const c of n) map[c] = hex[c]
if (spaceAsPlus) map[0x20] = '+' // overrides whatever percentEncodeSet thinks about it

// Input is not typechecked, for internal use only
const percentEncode = (u8, start = 0, end = u8.length) => decode2string(u8, start, end, map)
percentMap.set(id, percentEncode)
return percentEncode
}
9 changes: 8 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
"test:spidermonkey": "exodus-test --engine=spidermonkey:bundle",
"test:hermes": "exodus-test --engine=hermes:bundle",
"test:quickjs": "exodus-test --engine=quickjs:bundle",
"test:xs": "exodus-test --engine=xs:bundle",
"test:xs": "EXODUS_TEST_IGNORE='tests/whatwg.browser.test.js' exodus-test --engine=xs:bundle",
"test:engine262": "exodus-test --engine=engine262:bundle",
"test:deno": "exodus-test --engine=deno:pure",
"test:bun": "exodus-test --engine=bun:pure",
Expand Down Expand Up @@ -71,6 +71,7 @@
"/fallback/encoding.util.js",
"/fallback/hex.js",
"/fallback/latin1.js",
"/fallback/percent.js",
"/fallback/multi-byte.encodings.cjs",
"/fallback/multi-byte.encodings.json",
"/fallback/multi-byte.js",
Expand Down Expand Up @@ -120,6 +121,8 @@
"/utf8.js",
"/utf8.d.ts",
"/utf8.node.js",
"/whatwg.js",
"/whatwg.d.ts",
"/wif.js",
"/wif.d.ts"
],
Expand Down Expand Up @@ -200,6 +203,10 @@
"node": "./utf8.node.js",
"default": "./utf8.js"
},
"./whatwg.js": {
"types": "./whatwg.d.ts",
"default": "./whatwg.js"
},
"./wif.js": {
"types": "./wif.d.ts",
"default": "./wif.js"
Expand Down
146 changes: 146 additions & 0 deletions tests/whatwg.browser.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
import '@exodus/bytes/encoding.js'
import { percentEncodeAfterEncoding } from '@exodus/bytes/whatwg.js'
import { keccakprg } from '@noble/hashes/sha3-addons.js'
import { describe, test, before, after } from 'node:test'
import { labels } from './encoding/fixtures/encodings.cjs'

// The test uses https:// URL query, which is special
const specialquery = ' "#\'<>' // https://url.spec.whatwg.org/#special-query-percent-encode-set

const invalid = new Set(['replacement', 'utf-16le', 'utf-16be']) // https://encoding.spec.whatwg.org/#get-an-encoder

const { window, document } = globalThis

const range = (length, start) => Array.from({ length }, (_, i) => String.fromCodePoint(start + i))
const strings = [
...range(256, 0x20).filter((x) => x !== ' ' && x !== '#'), // we directly set to href
...range(256, 0)
.filter((x) => x !== '#' && x !== '\t' && x !== '\n' && x !== '\r')
.map((x) => `${x}*`),
...range(256, 0)
.filter((x) => x !== '#' && x !== '\t' && x !== '\n' && x !== '\r')
.map((x) => `*${x}*`),

String.fromCodePoint(0xfe_ff),
String.fromCodePoint(0xff_fd),
String.fromCodePoint(0xff_fe),
String.fromCodePoint(0xff_ff),
String.fromCodePoint(0x1_00_00),
String.fromCodePoint(0x2_f8_a6), // max big5
String.fromCodePoint(0x2_f8_a7),
String.fromCodePoint(0x1_10_00),

String.fromCodePoint(42, 0x1_00_00, 0x1_10_00, 42),
String.fromCodePoint(42, 0x1_00_00, 44, 0x1_10_00, 42),
String.fromCodePoint(42, 0x1_00_00, 0x1_10_00, 42),
String.fromCodePoint(42, 0x1_00_00, 44, 0x1_10_00, 42),

String.fromCharCode(0x20, 0x22, 0x3c, 0x3e, 0x60),
String.fromCharCode(0x20, 0x22, 0x24, 0x3c, 0x3e),
String.fromCharCode(0x3f, 0x5e, 0x60, 0x7b, 0x7d),
String.fromCharCode(0x2f, 0x3a, 0x3b, 0x3d, 0x40, 0x5b, 0x5c, 0x5d, 0x7c),
String.fromCharCode(0x24, 0x25, 0x26, 0x2b, 0x2c),
String.fromCharCode(0x21, 0x27, 0x28, 0x29, 0x7e),

String.fromCharCode(0x61, 0x62, 0xd8_00, 0x77, 0x78),
String.fromCharCode(0xd8_00, 0xd8_00),
String.fromCharCode(0x61, 0x62, 0xdf_ff, 0x77, 0x78),
String.fromCharCode(0xdf_ff, 0xd8_00),

range(0x2_00, 0x24).join(''), // from # + 1
range(0x2_00, 0xf6_00).join(''), // user-defined
range(0x2_00, 0xff_00).join(''),
range(0x20_00, 0x24).join(''),
range(0x20_00, 0xf0_00).join(''),
range(0x20_00, 0xf_f0_00).join(''),
'hello' + range(0x20_00, 0xf0_00).join('') + 'abc',
]

const fixedPRG = keccakprg() // We don't add any entropy, so it spills out predicatable results
for (let i = 1; i <= 32; i++) {
const u8 = fixedPRG.randomBytes(1024)
const u16 = new Uint16Array(u8.buffer, u8.byteOffset, u8.byteLength / 2)
const u32 = new Uint32Array(u8.buffer, u8.byteOffset, u8.byteLength / 4)
const chunk = [
String.fromCharCode.apply(String, u8),
String.fromCharCode.apply(String, u16),
String.fromCodePoint(...u32.map((x) => x % 0x11_00_00)),
].map(
(x) =>
x
.trim()
.replaceAll(/[\t\n\r#]/g, '')
.replaceAll(/[\x00-\x20]+$/g, '') // eslint-disable-line no-control-regex
)
strings.push(...chunk)
}

// Passes on Chromium, Servo. Webkit is incorrect. Firefox somewhy fails on CI only
const skip =
!document ||
!window ||
process.env.EXODUS_TEST_PLATFORM === 'webkit' ||
process.env.EXODUS_TEST_PLATFORM === 'firefox'

describe('percent-encode after encoding matches browser', { skip }, () => {
let handle
const onmessage = (event) => handle(event.data)
const iframe = document.createElement('iframe')

before(() => {
window.addEventListener('message', onmessage)
document.body.append(iframe)
})

after(() => {
window.removeEventListener('message', onmessage)
iframe.remove()
})

for (const encoding of labels) {
if (invalid.has(encoding)) continue
test(encoding, async (t) => {
let ok = 0
const loaded = new Promise((resolve) => (handle = resolve))
const html = `
<!DOCTYPE html>
<script>
var a = document.createElement('a');
window.parent.postMessage('', '*');
window.addEventListener('message', (e) => {
a.href = 'https://example.com/?' + e.data
window.parent.postMessage(a.search.slice(1), '*')
})
</script>`
iframe.src = `data:text/html;charset=${encoding},${encodeURI(html)}`
await loaded

for (const str of strings) {
const promise = new Promise((resolve) => (handle = resolve))
iframe.contentWindow.postMessage(str, '*')
const actual = percentEncodeAfterEncoding(encoding, str, specialquery)
t.assert.strictEqual(actual, await promise, `${encoding} #${ok + 1}`)
ok++
}

t.assert.strictEqual(ok, strings.length)
})
}
})

// Ensures that behavior mathches everywhere with snapshots
// Combined with the above check, we know that snapshots match reference browser platforms
describe('percent-encode after encoding matches snapshot', () => {
for (const encoding of labels) {
if (invalid.has(encoding)) continue
test(encoding, async (t) => {
const res = []
for (const str of strings) res.push(percentEncodeAfterEncoding(encoding, str, specialquery))
if (t.assert.snapshot) {
t.assert.snapshot(res)
} else {
t.skip('Snapshots are not supported')
}
})
}
})
Loading