Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: detect cids in query parameters #31

Merged
merged 6 commits into from
Jan 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 7 additions & 7 deletions src/sw/controller.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import { v4 as uuidv4 } from 'uuid'
import * as Sentry from '@sentry/browser'

import { Interceptor } from './interceptor.js'
import { findCIDInURL } from '../utils.js'
import { findCIDPathInURL } from '../utils.js'

const FILTERED_HOSTS = [
'images.studio.metaplex.com',
Expand Down Expand Up @@ -48,11 +48,11 @@ export class Controller {
}

const { url } = event.request
const cid = findCIDInURL(url)
const cidPath = findCIDPathInURL(url)

if (cid) {
debug('cid', cid, url)
event.respondWith(fetchCID(cid, this.saturn, this.clientId, event))
if (cidPath) {
debug('cidPath', cidPath, url)
event.respondWith(fetchCID(cidPath, this.saturn, this.clientId, event))
}
})
}
Expand All @@ -76,12 +76,12 @@ function getClientKey() {
return clientKey
}

async function fetchCID (cid, saturn, clientId, event) {
async function fetchCID(cidPath, saturn, clientId, event) {
let response = null
const { request } = event

try {
const interceptor = new Interceptor(cid, saturn, clientId, event)
const interceptor = new Interceptor(cidPath, saturn, clientId, event)
response = await interceptor.fetch()
} catch (err) {
debug(`${request.url}: fetchCID err %O`, err)
Expand Down
7 changes: 2 additions & 5 deletions src/sw/interceptor.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,18 +2,15 @@ import toIterable from 'browser-readablestream-to-it'
import createDebug from 'debug'
import * as Sentry from '@sentry/browser'

import { getCidPathFromURL } from '../utils.js'

const debug = createDebug('sw')
const cl = console.log

export class Interceptor {
static nocache = false // request/response skips L1 cache entirely
static bypasscache = false // request skips L1 cache, response gets cached.

constructor(cid, saturn, clientId, event) {
this.cid = cid
this.cidPath = getCidPathFromURL(event.request.url, cid)
constructor(cidPath, saturn, clientId, event) {
this.cidPath = cidPath
this.saturn = saturn
this.clientId = clientId
this.event = event
Expand Down
74 changes: 55 additions & 19 deletions src/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -52,31 +52,67 @@ export class Deferred {
}

// Modified from https://github.com/PinataCloud/ipfs-gateway-tools/blob/34533f3d5f3c0dd616327e2e5443072c27ea569d/src/index.js#L6
export function findCIDInURL (url) {
const splitUrl = url.split('?')[0].split('/')
for (const split of splitUrl) {
if (isIPFS.cid(split)) {
return split
}
const splitOnDot = split.split('.')[0]
if(isIPFS.cid(splitOnDot)) {
return splitOnDot
export function findCIDPathInURL(url) {
let urlObj
try {
urlObj = new URL(url)
} catch (err) {
return null
}

let cid = null
let path = null

const { hostname, pathname, searchParams, href } = urlObj

const searchStrings = [
hostname + pathname, // checks for path based or subdomain based cids.
...searchParams.values(), // params could contain cid URLs, e.g. ?url=ipfs.io/ipfs/<cid>
]

for (const str of searchStrings) {
const result = findCIDPathInUrlComponent(str)

// sanity check if parsed cid appears in URL
if (result.cid && href.includes(result.cid)) {
({ cid, path } = result)
break
}
}

return null
const cidPath = path ? `${cid}/${path}` : cid

return cidPath
}

export function getCidPathFromURL(url, cid) {
const { hostname, pathname } = new URL(url)
let cidPath
function findCIDPathInUrlComponent(str) {
let cid = null
let path = null

const splitStr = str.replace(/https?:\/\//, '').split('/')
// Heuristic to check if the first segment is a domain.
const isMaybeHost = splitStr[0].includes('.')

// Assumes the rest of the segments after the cid form the file path.
const segmentsToPath = i => splitStr.slice(i).join('/') || null

for (let i = 0; i < splitStr.length; i++) {
const segment = splitStr[i]
if (isIPFS.cid(segment)) {
cid = segment
path = segmentsToPath(i + 1)
break
}

if (pathname.startsWith('/ipfs/')) {
cidPath = pathname.replace('/ipfs/', '')
} else if (hostname.includes(cid)) {
// https://<cid>.ipfs.dweb.link/cat.png -> https://saturn.ms/ipfs/<cid>/cat.png
cidPath = cid + pathname
const splitOnDot = segment.split('.')[0]
if(isIPFS.cid(splitOnDot)) {
cid = splitOnDot
if (isMaybeHost) {
path = segmentsToPath(1)
}
break
}
}

return cidPath
return { cid, path }
}
86 changes: 66 additions & 20 deletions test/utils.spec.js
Original file line number Diff line number Diff line change
@@ -1,41 +1,87 @@
import assert from 'node:assert/strict'
import { describe, it } from 'node:test'
import { findCIDInURL, getCidPathFromURL } from '#src/utils.js'
import { findCIDPathInURL } from '#src/utils.js'

describe('controller', () => {
it('should find cid in the subdomain', () => {
it('finds the cid in the subdomain', () => {
const cid = 'bafybeigt4657qnz5bi2pa7tdsbiobny55hkpt5vupgnueex22tzvwxfiym'
const url = `https://${cid}.ipfs.dweb.link`

const foundCid = findCIDInURL(url)
assert.strictEqual(foundCid, cid)
assert.strictEqual(findCIDPathInURL(url), cid)
})

it('should find cid in the url path', () => {
it('finds the cidPath in the subdomain', () => {
const cid = 'bafybeidrf56yzbkocajbloyafrebrdzsam3uj35sce2fdyo4elb6zzoily'
const path = 'test/cat.png'
const cidPath = `${cid}/${path}`
const url = `https://${cid}.ipfs.dweb.link/${path}`

assert.strictEqual(findCIDPathInURL(url), cidPath)
})

it('finds the cid in the url path', () => {
const cid = 'QmS29VtmK7Ax6TMmMwbwqtuKSGRJTLJAmHMW83qGvBBxhV'
const url = `https://ipfs.io/ipfs/${cid}`

const foundCid = findCIDInURL(url)
assert.strictEqual(foundCid, cid)
assert.strictEqual(findCIDPathInURL(url), cid)
})

it('should find cidPath in the subdomain', () => {
const cid = 'bafybeigt4657qnz5bi2pa7tdsbiobny55hkpt5vupgnueex22tzvwxfiym'
const path = 'hello/world.png'
const cidPath = `${cid}/${path}`
const url = `https://${cid}.ipfs.dweb.link/${path}`
it('finds the cidPath in the url path', () => {
const cidPath = 'QmS29VtmK7Ax6TMmMwbwqtuKSGRJTLJAmHMW83qGvBBxhV/cat.png'
const url = `https://ipfs.io/ipfs/${cidPath}`

const foundCidPath = getCidPathFromURL(url, cid)
assert.strictEqual(foundCidPath, cidPath)
assert.strictEqual(findCIDPathInURL(url), cidPath)
})

it('should find cidPath in the url path', () => {
const cid = 'QmS29VtmK7Ax6TMmMwbwqtuKSGRJTLJAmHMW83qGvBBxhV'
const path = 'hello/world.png'
it('finds the cid in an encoded query param', () => {
const cid = 'bafybeidrf56yzbkocajbloyafrebrdzsam3uj35sce2fdyo4elb6zzoily'
const url = `https://proxy.com/?url=ipfs.io%2Fipfs%2F${cid}/`

assert.strictEqual(findCIDPathInURL(url), cid)
})

it('finds the cidPath in an encoded query param', () => {
const cidPath = 'bafybeidrf56yzbkocajbloyafrebrdzsam3uj35sce2fdyo4elb6zzoily/test/cat.png'
const url = `https://proxy.com/?url=https%3A%2F%2Fipfs.io%2Fipfs%2F${cidPath}`

assert.strictEqual(findCIDPathInURL(url), cidPath)
})

it('finds the subdomain cid in an encoded query param', () => {
const cid = 'bafybeidrf56yzbkocajbloyafrebrdzsam3uj35sce2fdyo4elb6zzoily'
const param = `${cid}.ipfs.dweb.link`
const url = `https://proxy.com/?url=${param}`

assert.strictEqual(findCIDPathInURL(url), cid)
})

it('finds the subdomain cidPath in an encoded query param', () => {
const cid = 'bafybeidrf56yzbkocajbloyafrebrdzsam3uj35sce2fdyo4elb6zzoily'
const path = 'dog/cow/cat.png'
const cidPath = `${cid}/${path}`
const url = `https://ipfs.io/ipfs/${cid}/${path}`
const param = `https%3A%2F%2F${cid}.ipfs.dweb.link/${path}`
const url = `https://proxy.com/?url=${param}`

assert.strictEqual(findCIDPathInURL(url), cidPath)
})

it('finds the plain cid (no /ipfs/ prefix) in an encoded query param', () => {
const cid = 'bafybeidrf56yzbkocajbloyafrebrdzsam3uj35sce2fdyo4elb6zzoily'
const url = `https://proxy.com/?cid=${cid}`

assert.strictEqual(findCIDPathInURL(url), cid)
})

it('finds the plain cidPath (no /ipfs/ prefix) in an encoded query param', () => {
const cidPath = 'bafybeidrf56yzbkocajbloyafrebrdzsam3uj35sce2fdyo4elb6zzoily/test/cat.png'
const url = `https://proxy.com/?cid=${cidPath}`

assert.strictEqual(findCIDPathInURL(url), cidPath)
})

it('returns null if cid not found', () => {
const url = 'https://example.com/hello/world.png'

const foundCidPath = getCidPathFromURL(url, cid)
assert.strictEqual(foundCidPath, cidPath)
assert.strictEqual(findCIDPathInURL(url), null)
})
})
Loading