Skip to content

Commit

Permalink
Plot Updates (#2)
Browse files Browse the repository at this point in the history
* update protein language models

* update the sizes of the Protein Language Models

* udpate protein_model with max/min dates and Gbs

* update the links
  • Loading branch information
zachcp authored Jan 5, 2025
1 parent cbc93da commit 458f6a7
Show file tree
Hide file tree
Showing 5 changed files with 119 additions and 24 deletions.
29 changes: 29 additions & 0 deletions .github/workflows/links.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
name: Links

on:
push:
branches: [main]
pull_request:
branches: [main]

jobs:
linkChecker:
runs-on: ubuntu-latest
permissions:
issues: write # required for peter-evans/create-issue-from-file
steps:
- uses: actions/checkout@v4

- name: Link Checker
id: lychee
uses: lycheeverse/lychee-action@v2
with:
fail: false

- name: Create Issue From File
if: steps.lychee.outputs.exit_code != 0
uses: peter-evans/create-issue-from-file@v5
with:
title: Link Checker Report
content-filepath: ./lychee/out.md
labels: report, automated issue
22 changes: 7 additions & 15 deletions src/components/hardware.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,8 @@ const parseDate = d3.timeParse("%b %Y");

function parseMemorySize(str) {
if (!str) return null;

const match = str.match(/^(\d+)\s*(KB|MB|GB|TB|PB)?$/i);
if (!match) return null;

const [, value, unit = "GB"] = match;
const multipliers = {
KB: 1 / 1024 / 1024, // Convert to GB
Expand All @@ -17,19 +15,17 @@ function parseMemorySize(str) {
TB: 1024, // Convert to GB
PB: 1024 * 1024, // Convert to GB
};

return Number(value) * multipliers[unit.toUpperCase()];
}

function munge_hardware(hardware) {
return hardware
export function hardware_plot(hardware, { width, height } = {}) {
let data = hardware
.map((d) => {
const parsedDate = parseDate(d["Release Date"]);
const parsedRam = parseMemorySize(d["Base RAM"]);

return {
y: parsedDate,
x: parsedRam,
x: parsedDate,
y: parsedRam,
model: d.Model,
};
})
Expand All @@ -40,21 +36,17 @@ function munge_hardware(hardware) {
}
return isValid;
});
}

export function hardware_plot(hardware, { width, height } = {}) {
let data = munge_hardware(hardware);

return Plot.plot({
y: {
grid: true,
label: "Release Date",
type: "time",
label: "RAM (GB)",
nice: true,
},
x: {
grid: true,
label: "RAM (GB)",
label: "Release Date",
type: "time",
nice: true,
},
marks: [
Expand Down
70 changes: 70 additions & 0 deletions src/components/protein_models.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import * as Plot from "npm:@observablehq/plot";
import * as d3 from "npm:d3";
const parseDate = d3.timeParse("%b %Y");

function parseMemorySize(str) {
if (!str) return null;
if (str === "Same as ESM2") return 3; // Special case handling
if (str === "Available through repo") return null;
const match = str.match(/~?(\d+)\s*(KB|MB|GB|TB|PB)?$/i);
if (!match) return null;
const [, value, unit = "GB"] = match;
const multipliers = {
KB: 1 / 1024 / 1024, // Convert to GB
MB: 1 / 1024, // Convert to GB
GB: 1, // Already in GB
TB: 1024, // Convert to GB
PB: 1024 * 1024, // Convert to GB
};
return Number(value) * multipliers[unit.toUpperCase()];
}

function munge_protein_models(models) {
return models
.map((d) => {
const parsedDate = parseDate(d.Publication_Date);
const parsedSize = parseMemorySize(d.TotalWeightsSize);
return {
x: parsedDate,
y: parsedSize,
name: d.Name,
};
})
.filter((d) => {
const isValid = d.x != null && d.y != null;
if (!isValid) {
console.log("Filtered out entry:", d);
}
return isValid;
});
}

export function protein_model_plot(models, { width = 800, height = 400 } = {}) {
let data = munge_protein_models(models);
return Plot.plot({
x: {
grid: true,
label: "Publication Date",
type: "time",
nice: true,
domain: [new Date("2020-01-01"), new Date("2024-12-31")],
},
y: {
grid: true,
label: "Model Size (GB)",
nice: true,
domain: [0, 4],
},
marks: [
Plot.dot(data),
Plot.text(data, {
x: "x",
y: "y",
text: "name",
dy: -8,
}),
],
height,
width,
});
}
9 changes: 7 additions & 2 deletions src/data/protein_language_models.csv
Original file line number Diff line number Diff line change
@@ -1,2 +1,7 @@
Name, Publication, Publication_Date, Version, SourceURL, WeightURL, TotalWeigthsSize,
AlphaFold
Name,Publication,Publication_URL,Publication_Date,Version,SourceURL,WeightURL,TotalWeightsSize,Architecture,Training Data Size,License
AlphaFold2,"Highly accurate protein structure prediction with AlphaFold",https://pubmed.ncbi.nlm.nih.gov/34265844/,Jul 2021,v2.3.1,https://github.com/deepmind/alphafold,https://storage.googleapis.com/alphafold/,~3GB,Transformer-based,~170,000 structures (PDB),Apache 2.0
ESM2,"Language models of protein sequences at the scale of evolution enable accurate structure prediction",https://pubmed.ncbi.nlm.nih.gov/36477579/,Jan 2023,ESM-2,https://github.com/facebookresearch/esm,https://dl.fbaipublicfiles.com/fair-esm/models/,~3GB,Transformer,250M sequences,MIT
ESMFold,"High-accuracy protein structure prediction with language models",https://www.biorxiv.org/content/10.1101/2022.07.20.500902v2,Dec 2022,v1,https://github.com/facebookresearch/esm/tree/main/examples/esmfold,Same as ESM2,~3GB,Transformer + Structure Module,Based on ESM2,MIT
ProteinMPNN,"Neural network-based protein sequence design",https://pubmed.ncbi.nlm.nih.gov/36038635/,Jul 2022,v1,https://github.com/dauparas/ProteinMPNN,https://files.ipd.uw.edu/pub/training/weights/,~200MB,Message Passing Neural Network,PDB structures,MIT
OpenFold,"OpenFold: Retraining AlphaFold2 from scratch",https://www.biorxiv.org/content/10.1101/2022.11.20.517210v2,Sep 2022,v1.0,https://github.com/aqlaboratory/openfold,Available through repo,~3GB,Similar to AlphaFold2,Same as AlphaFold2,Apache 2.0
RoseTTAFold,"Accurate prediction of protein structures and interactions using a three-track neural network",https://pubmed.ncbi.nlm.nih.gov/34282049/,Jul 2021,v1,https://github.com/RosettaCommons/RoseTTAFold,Available through repo,~1GB,Three-track architecture,PDB + MSA data,MIT
13 changes: 6 additions & 7 deletions src/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,13 @@ toc: true

```js
const plms = FileAttachment("./data/protein_language_models.csv").csv();
import {protein_model_plot} from "./components/protein_models.js";

```

```js
plms
let plmplot = protein_model_plot(plms);
display(plmplot)
```


Expand All @@ -23,13 +26,9 @@ This is highlighting the release year and basic specs of abailabel Desktop softa
```js
const hardware = FileAttachment("./data/desktop_hardware.csv").csv();
import {hardware_plot} from "./components/hardware.js";

```

```js
let hplot = hardware_plot(hardware);
```

```js
hplot
let hplot = hardware_plot(hardware);
display(hplot)
```

0 comments on commit 458f6a7

Please sign in to comment.