forked from NBISweden/single-cell_sib_scilifelab
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlab.html
1220 lines (1008 loc) · 89.5 KB
/
lab.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta charset="utf-8" />
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta name="generator" content="pandoc" />
<meta name="author" content="NBIS | 17-Oct-2019" />
<title>protein RNA-Seq</title>
<script src="lab_files/jquery-1.11.3/jquery.min.js"></script>
<meta name="viewport" content="width=device-width, initial-scale=1" />
<link href="lab_files/bootstrap-3.3.5/css/flatly.min.css" rel="stylesheet" />
<script src="lab_files/bootstrap-3.3.5/js/bootstrap.min.js"></script>
<script src="lab_files/bootstrap-3.3.5/shim/html5shiv.min.js"></script>
<script src="lab_files/bootstrap-3.3.5/shim/respond.min.js"></script>
<script src="lab_files/jqueryui-1.11.4/jquery-ui.min.js"></script>
<link href="lab_files/tocify-1.9.1/jquery.tocify.css" rel="stylesheet" />
<script src="lab_files/tocify-1.9.1/jquery.tocify.js"></script>
<script src="lab_files/navigation-1.1/tabsets.js"></script>
<link href="lab_files/pagedtable-1.1/css/pagedtable.css" rel="stylesheet" />
<script src="lab_files/pagedtable-1.1/js/pagedtable.js"></script>
<link href="lab_files/font-awesome-5.1.0/css/all.css" rel="stylesheet" />
<link href="lab_files/font-awesome-5.1.0/css/v4-shims.css" rel="stylesheet" />
<link id="font-awesome-1-attachment" rel="attachment" href="lab_files/font-awesome-5.1.0/fonts/fontawesome-webfont.ttf"/>
<style type="text/css">code{white-space: pre;}</style>
<style type="text/css" data-origin="pandoc">
div.sourceCode { overflow-x: auto; }
table.sourceCode, tr.sourceCode, td.lineNumbers, td.sourceCode {
margin: 0; padding: 0; vertical-align: baseline; border: none; }
table.sourceCode { width: 100%; line-height: 100%; background-color: #f8f8f8; }
td.lineNumbers { text-align: right; padding-right: 4px; padding-left: 4px; color: #aaaaaa; border-right: 1px solid #aaaaaa; }
td.sourceCode { padding-left: 5px; }
pre, code { background-color: #f8f8f8; }
code > span.kw { color: #204a87; font-weight: bold; } /* Keyword */
code > span.dt { color: #204a87; } /* DataType */
code > span.dv { color: #0000cf; } /* DecVal */
code > span.bn { color: #0000cf; } /* BaseN */
code > span.fl { color: #0000cf; } /* Float */
code > span.ch { color: #4e9a06; } /* Char */
code > span.st { color: #4e9a06; } /* String */
code > span.co { color: #8f5902; font-style: italic; } /* Comment */
code > span.ot { color: #8f5902; } /* Other */
code > span.al { color: #ef2929; } /* Alert */
code > span.fu { color: #000000; } /* Function */
code > span.er { color: #a40000; font-weight: bold; } /* Error */
code > span.wa { color: #8f5902; font-weight: bold; font-style: italic; } /* Warning */
code > span.cn { color: #000000; } /* Constant */
code > span.sc { color: #000000; } /* SpecialChar */
code > span.vs { color: #4e9a06; } /* VerbatimString */
code > span.ss { color: #4e9a06; } /* SpecialString */
code > span.im { } /* Import */
code > span.va { color: #000000; } /* Variable */
code > span.cf { color: #204a87; font-weight: bold; } /* ControlFlow */
code > span.op { color: #ce5c00; font-weight: bold; } /* Operator */
code > span.pp { color: #8f5902; font-style: italic; } /* Preprocessor */
code > span.ex { } /* Extension */
code > span.at { color: #c4a000; } /* Attribute */
code > span.do { color: #8f5902; font-weight: bold; font-style: italic; } /* Documentation */
code > span.an { color: #8f5902; font-weight: bold; font-style: italic; } /* Annotation */
code > span.cv { color: #8f5902; font-weight: bold; font-style: italic; } /* CommentVar */
code > span.in { color: #8f5902; font-weight: bold; font-style: italic; } /* Information */
</style>
<script>
// apply pandoc div.sourceCode style to pre.sourceCode instead
(function() {
var sheets = document.styleSheets;
for (var i = 0; i < sheets.length; i++) {
if (sheets[i].ownerNode.dataset["origin"] !== "pandoc") continue;
try { var rules = sheets[i].cssRules; } catch (e) { continue; }
for (var j = 0; j < rules.length; j++) {
var rule = rules[j];
// check if there is a div.sourceCode rule
if (rule.type !== rule.STYLE_RULE || rule.selectorText !== "div.sourceCode") continue;
var style = rule.style.cssText;
// check if color or background-color is set
if (rule.style.color === '' || rule.style.backgroundColor === '') continue;
// replace div.sourceCode by a pre.sourceCode rule
sheets[i].deleteRule(j);
sheets[i].insertRule('pre.sourceCode{' + style + '}', j);
}
}
})();
</script>
<style type="text/css">
pre:not([class]) {
background-color: white;
}
</style>
<style type="text/css">
h1 {
font-size: 34px;
}
h1.title {
font-size: 38px;
}
h2 {
font-size: 30px;
}
h3 {
font-size: 24px;
}
h4 {
font-size: 18px;
}
h5 {
font-size: 16px;
}
h6 {
font-size: 12px;
}
.table th:not([align]) {
text-align: left;
}
</style>
<link rel="stylesheet" href="assets/lab.css" type="text/css" />
<style type = "text/css">
.main-container {
max-width: 940px;
margin-left: auto;
margin-right: auto;
}
code {
color: inherit;
background-color: rgba(0, 0, 0, 0.04);
}
img {
max-width:100%;
height: auto;
}
.tabbed-pane {
padding-top: 12px;
}
.html-widget {
margin-bottom: 20px;
}
button.code-folding-btn:focus {
outline: none;
}
summary {
display: list-item;
}
</style>
<!-- tabsets -->
<style type="text/css">
.tabset-dropdown > .nav-tabs {
display: inline-table;
max-height: 500px;
min-height: 44px;
overflow-y: auto;
background: white;
border: 1px solid #ddd;
border-radius: 4px;
}
.tabset-dropdown > .nav-tabs > li.active:before {
content: "";
font-family: 'Glyphicons Halflings';
display: inline-block;
padding: 10px;
border-right: 1px solid #ddd;
}
.tabset-dropdown > .nav-tabs.nav-tabs-open > li.active:before {
content: "";
border: none;
}
.tabset-dropdown > .nav-tabs.nav-tabs-open:before {
content: "";
font-family: 'Glyphicons Halflings';
display: inline-block;
padding: 10px;
border-right: 1px solid #ddd;
}
.tabset-dropdown > .nav-tabs > li.active {
display: block;
}
.tabset-dropdown > .nav-tabs > li > a,
.tabset-dropdown > .nav-tabs > li > a:focus,
.tabset-dropdown > .nav-tabs > li > a:hover {
border: none;
display: inline-block;
border-radius: 4px;
}
.tabset-dropdown > .nav-tabs.nav-tabs-open > li {
display: block;
float: none;
}
.tabset-dropdown > .nav-tabs > li {
display: none;
}
</style>
<script>
$(document).ready(function () {
window.buildTabsets("TOC");
});
$(document).ready(function () {
$('.tabset-dropdown > .nav-tabs > li').click(function () {
$(this).parent().toggleClass('nav-tabs-open')
});
});
</script>
<!-- code folding -->
<script>
$(document).ready(function () {
// move toc-ignore selectors from section div to header
$('div.section.toc-ignore')
.removeClass('toc-ignore')
.children('h1,h2,h3,h4,h5').addClass('toc-ignore');
// establish options
var options = {
selectors: "h1,h2,h3,h4",
theme: "bootstrap3",
context: '.toc-content',
hashGenerator: function (text) {
return text.replace(/[.\\/?&!#<>]/g, '').replace(/\s/g, '_').toLowerCase();
},
ignoreSelector: ".toc-ignore",
scrollTo: 0
};
options.showAndHide = true;
options.smoothScroll = true;
// tocify
var toc = $("#TOC").tocify(options).data("toc-tocify");
});
</script>
<style type="text/css">
#TOC {
margin: 25px 0px 20px 0px;
}
@media (max-width: 768px) {
#TOC {
position: relative;
width: 100%;
}
}
.toc-content {
padding-left: 30px;
padding-right: 40px;
}
div.main-container {
max-width: 1200px;
}
div.tocify {
width: 20%;
max-width: 260px;
max-height: 85%;
}
@media (min-width: 768px) and (max-width: 991px) {
div.tocify {
width: 25%;
}
}
@media (max-width: 767px) {
div.tocify {
width: 100%;
max-width: none;
}
}
.tocify ul, .tocify li {
line-height: 20px;
}
.tocify-subheader .tocify-item {
font-size: 0.90em;
}
.tocify .list-group-item {
border-radius: 0px;
}
</style>
</head>
<body>
<div class="container-fluid main-container">
<!-- setup 3col/9col grid for toc_float and main content -->
<div class="row-fluid">
<div class="col-xs-12 col-sm-4 col-md-3">
<div id="TOC" class="tocify">
</div>
</div>
<div class="toc-content col-xs-12 col-sm-8 col-md-9">
<div class="fluid-row" id="header">
<h1 class="title toc-ignore">protein RNA-Seq</h1>
<h3 class="subtitle">Protein RNA-Seq Labs</h3>
<h4 class="author"><b>NBIS</b> | 17-Oct-2019</h4>
</div>
<!-- rmd lab header -->
<!-- custom fonts -->
<p><link href="https://fonts.googleapis.com/css?family=Roboto|Source+Sans+Pro:300,400,600|Ubuntu+Mono&subset=latin-ext" rel="stylesheet"></p>
<p><img src="assets/logo.svg" alt="logo" class="trlogo"></p>
<p><br></p>
<!-- ------------ Only edit title, subtitle & author above this ------------ -->
<hr />
<div id="single-cell-rna-and-protein-analysis" class="section level2">
<h2><span class="header-section-number">0.1</span> Single cell RNA and protein analysis</h2>
<p>Single cell RNA-seq is a powerful approach to study the continually changing cellular transcriptome. The possibility of measuring thousands of RNA in each cell make it a strong tool differntiate cells. But most of the functions that are carried out in the cells are being made by proteins and not RNA. They are also more stable and and abundant in the cells and represent a different way of analysiing the data. Thus protein are more accurate molecules to represent the function of the cell. Therefore ways of measuring the protein levels in the cells is a great complementary resource to determine both differences between the cell and also to identify ongoing processes in the cells.</p>
<p>In this exercise we will look at two different ways of incorporating protein data with scRNA-seq data to facilitate the analysis of functions in the cell. The first one is based on SITE-seq data and means to incorporate surface protein data to evaluate the classification of cells carried out by scRNA seq analysis. We will also use the protein data itself to identify the different clusters. For more information regarding site seq see the presentation and links to papers there. The second one involves SPARC data and focuses on protein QC analysis and how SPARC data can be used to facilitate the identification of transcription factor targets.</p>
<div class="abstract">
<p><strong>Main exercise</strong></p>
<ul>
<li>01 SITE seq</li>
<li>02 SPARC</li>
</ul>
</div>
</div>
<div id="site-seq-exercise" class="section level1">
<h1><span class="header-section-number">1</span> SITE-seq exercise</h1>
<p>This exercise is bluntly stolen from the <a href="https://satijalab.org/seurat/v3.1/multimodal_vignette.html">Seurat home page</a>.</p>
<div id="data-description" class="section level2">
<h2><span class="header-section-number">1.1</span> Data description</h2>
<p>The data used in this exercise is from the paper: <strong>Stoeckius M, <em>et al</em>. “Simultaneous epitope and transcriptome measurement in single cells.” <a href="https://www.ncbi.nlm.nih.gov/pubmed/28759029">Nat Methods (2017)</a></strong>.</p>
</div>
<div id="load-in-the-data" class="section level2">
<h2><span class="header-section-number">1.2</span> Load in the data</h2>
<p>This vignette demonstrates new features that allow users to analyze and explore multi-modal data with Seurat. While this represents an initial release, we are excited to release significant new functionality for multi-modal datasets in the future.</p>
<p>Here, we analyze a dataset of 8,617 cord blood mononuclear cells (CBMCs), produced with CITE-seq, where we simultaneously measure the single cell transcriptomes alongside the expression of 11 surface proteins, whose levels are quantified with DNA-barcoded antibodies. First, we load in two count matrices : one for the RNA measurements, and one for the antibody-derived tags (ADT). You can download the ADT file <a href="ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE100nnn/GSE100866/suppl/GSE100866_CBMC_8K_13AB_10X-ADT_umi.csv.gz">here</a> and the RNA file <a href="ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE100nnn/GSE100866/suppl/GSE100866_CBMC_8K_13AB_10X-RNA_umi.csv.gz">here</a></p>
</div>
<div id="using-seurat-with-multi-modal-data" class="section level2">
<h2><span class="header-section-number">1.3</span> Using Seurat with multi-modal data</h2>
<p>At this point you need to put the data that you downloaded.</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">CBMC_rna_file =<span class="st"> "data/GSE100866_CBMC_8K_13AB_10X-RNA_umi.csv.gz"</span>
CBMC_SITEseq_file =<span class="st"> "data/GSE100866_CBMC_8K_13AB_10X-ADT_umi.csv.gz"</span> </code></pre></div>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># Load in the RNA UMI matrix</span>
<span class="co"># Note that this dataset also contains ~5% of mouse cells, which we can use as negative controls</span>
<span class="co"># for the protein measurements. For this reason, the gene expression matrix has HUMAN_ or MOUSE_</span>
<span class="co"># appended to the beginning of each gene.</span>
cbmc.rna <-<span class="st"> </span><span class="kw">as.sparse</span>(<span class="kw">read.csv</span>(<span class="dt">file =</span> CBMC_rna_file, <span class="dt">sep =</span> <span class="st">","</span>,
<span class="dt">header =</span> <span class="ot">TRUE</span>, <span class="dt">row.names =</span> <span class="dv">1</span>))
<span class="co"># To make life a bit easier going forward, we're going to discard all but the top 100 most</span>
<span class="co"># highly expressed mouse genes, and remove the 'HUMAN_' from the CITE-seq prefix</span>
cbmc.rna <-<span class="st"> </span><span class="kw">CollapseSpeciesExpressionMatrix</span>(cbmc.rna)
<span class="co"># Load in the ADT UMI matrix</span>
cbmc.adt <-<span class="st"> </span><span class="kw">as.sparse</span>(<span class="kw">read.csv</span>(<span class="dt">file =</span> CBMC_SITEseq_file, <span class="dt">sep =</span> <span class="st">","</span>,
<span class="dt">header =</span> <span class="ot">TRUE</span>, <span class="dt">row.names =</span> <span class="dv">1</span>))
<span class="co"># When adding multimodal data to Seurat, it's okay to have duplicate feature names. Each set of</span>
<span class="co"># modal data (eg. RNA, ADT, etc.) is stored in its own Assay object. One of these Assay objects</span>
<span class="co"># is called the 'default assay', meaning it's used for all analyses and visualization. To pull</span>
<span class="co"># data from an assay that isn't the default, you can specify a key that's linked to an assay for</span>
<span class="co"># feature pulling. To see all keys for all objects, use the Key function. Lastly, we observed</span>
<span class="co"># poor enrichments for CCR5, CCR7, and CD10 - and therefore remove them from the matrix</span>
<span class="co"># (optional)</span>
cbmc.adt <-<span class="st"> </span>cbmc.adt[<span class="kw">setdiff</span>(<span class="kw">rownames</span>(<span class="dt">x =</span> cbmc.adt), <span class="kw">c</span>(<span class="st">"CCR5"</span>, <span class="st">"CCR7"</span>, <span class="st">"CD10"</span>)), ]</code></pre></div>
<div id="setup-a-seurat-object-and-cluster-cells-based-on-rna-expression" class="section level3">
<h3><span class="header-section-number">1.3.1</span> Setup a Seurat object, and cluster cells based on RNA expression</h3>
<p>This you have done before during the course but if you want you can go to <a href="http://satijalab.org/seurat/pbmc3k_tutorial.html">Seurats PBMC clustering guided tutorial</a></p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">cbmc <-<span class="st"> </span><span class="kw">CreateSeuratObject</span>(<span class="dt">counts =</span> cbmc.rna)
<span class="co"># standard log-normalization</span>
cbmc <-<span class="st"> </span><span class="kw">NormalizeData</span>(cbmc)
<span class="co"># choose ~1k variable features</span>
cbmc <-<span class="st"> </span><span class="kw">FindVariableFeatures</span>(cbmc)
<span class="co"># standard scaling (no regression)</span>
cbmc <-<span class="st"> </span><span class="kw">ScaleData</span>(cbmc)
<span class="co"># Run PCA, select 13 PCs for tSNE visualization and graph-based clustering</span>
cbmc <-<span class="st"> </span><span class="kw">RunPCA</span>(cbmc, <span class="dt">verbose =</span> <span class="ot">FALSE</span>)
<span class="kw">ElbowPlot</span>(cbmc, <span class="dt">ndims =</span> <span class="dv">50</span>)</code></pre></div>
<p><img src="lab_files/figure-html/get%20clusters%20from%20RNA-1.svg" width="672" style="display: block; margin: auto auto auto 0;" /></p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">cbmc <-<span class="st"> </span><span class="kw">FindNeighbors</span>(cbmc, <span class="dt">dims =</span> <span class="dv">1</span>:<span class="dv">25</span>)
cbmc <-<span class="st"> </span><span class="kw">FindClusters</span>(cbmc, <span class="dt">resolution =</span> <span class="fl">0.8</span>)
cbmc <-<span class="st"> </span><span class="kw">RunTSNE</span>(cbmc, <span class="dt">dims =</span> <span class="dv">1</span>:<span class="dv">25</span>, <span class="dt">method =</span> <span class="st">"FIt-SNE"</span>)
<span class="co"># Find the markers that define each cluster, and use these to annotate the clusters, we use</span>
<span class="co"># max.cells.per.ident to speed up the process</span>
cbmc.rna.markers <-<span class="st"> </span><span class="kw">FindAllMarkers</span>(cbmc, <span class="dt">max.cells.per.ident =</span> <span class="dv">100</span>, <span class="dt">min.diff.pct =</span> <span class="fl">0.3</span>, <span class="dt">only.pos =</span> <span class="ot">TRUE</span>)</code></pre></div>
<pre><code>## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 8617
## Number of edges: 347548
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8592
## Number of communities: 19
## Elapsed time: 1 seconds</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># Note, for simplicity we are merging two CD14+ Monocyte clusters (that differ in expression of</span>
<span class="co"># HLA-DR genes) and NK clusters (that differ in cell cycle stage)</span>
new.cluster.ids <-<span class="st"> </span><span class="kw">c</span>(<span class="st">"Memory CD4 T"</span>, <span class="st">"CD14+ Mono"</span>, <span class="st">"Naive CD4 T"</span>, <span class="st">"NK"</span>, <span class="st">"CD14+ Mono"</span>, <span class="st">"Mouse"</span>, <span class="st">"B"</span>,
<span class="st">"CD8 T"</span>, <span class="st">"CD16+ Mono"</span>, <span class="st">"T/Mono doublets"</span>, <span class="st">"NK"</span>, <span class="st">"CD34+"</span>, <span class="st">"Multiplets"</span>, <span class="st">"Mouse"</span>, <span class="st">"Eryth"</span>, <span class="st">"Mk"</span>,
<span class="st">"Mouse"</span>, <span class="st">"DC"</span>, <span class="st">"pDCs"</span>)
<span class="kw">names</span>(new.cluster.ids) <-<span class="st"> </span><span class="kw">levels</span>(cbmc)
cbmc <-<span class="st"> </span><span class="kw">RenameIdents</span>(cbmc, new.cluster.ids)</code></pre></div>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">DimPlot</span>(cbmc, <span class="dt">label =</span> <span class="ot">TRUE</span>) +<span class="st"> </span><span class="kw">NoLegend</span>()</code></pre></div>
<p><img src="lab_files/figure-html/get%20clusters%20from%20RNA%204-1.svg" width="672" style="display: block; margin: auto auto auto 0;" /></p>
</div>
<div id="add-the-protein-expression-levels-to-the-seurat-object" class="section level3">
<h3><span class="header-section-number">1.3.2</span> Add the protein expression levels to the Seurat object</h3>
<p>Seurat v3.0 allows you to store information from multiple assays in the same object, as long as the data is multi-modal (collected on the same set of cells). You can use the <em>SetAssayData</em> and <em>GetAssayData</em> accessor functions to add and fetch data from additional assays.</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># We will define an ADT assay, and store raw counts for it</span>
<span class="co"># If you are interested in how these data are internally stored, you can check out the Assay</span>
<span class="co"># class, which is defined in objects.R; note that all single-cell expression data, including RNA</span>
<span class="co"># data, are still stored in Assay objects, and can also be accessed using GetAssayData</span>
cbmc[[<span class="st">"ADT"</span>]] <-<span class="st"> </span><span class="kw">CreateAssayObject</span>(<span class="dt">counts =</span> cbmc.adt)
<span class="co"># Now we can repeat the preprocessing (normalization and scaling) steps that we typically run</span>
<span class="co"># with RNA, but modifying the 'assay' argument. For CITE-seq data, we do not recommend typical</span>
<span class="co"># LogNormalization. Instead, we use a centered log-ratio (CLR) normalization, computed</span>
<span class="co"># independently for each feature. This is a slightly improved procedure from the original</span>
<span class="co"># publication, and we will release more advanced versions of CITE-seq normalizations soon.</span>
cbmc <-<span class="st"> </span><span class="kw">NormalizeData</span>(cbmc, <span class="dt">assay =</span> <span class="st">"ADT"</span>, <span class="dt">normalization.method =</span> <span class="st">"CLR"</span>)
cbmc <-<span class="st"> </span><span class="kw">ScaleData</span>(cbmc, <span class="dt">assay =</span> <span class="st">"ADT"</span>)</code></pre></div>
</div>
<div id="visualize-protein-levels-on-rna-clusters" class="section level3">
<h3><span class="header-section-number">1.3.3</span> Visualize protein levels on RNA clusters</h3>
<p>You can use the names of any ADT markers, (i.e. adt_CD4), in FetchData, FeaturePlot, RidgePlot, FeatureScatter, DoHeatmap, or any other <a href="http://satijalab.org/seurat/visualization_vignette.html">visualization features</a></p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># in this plot, protein (ADT) levels are on top, and RNA levels are on the bottom</span>
F =<span class="st"> </span><span class="kw">FeaturePlot</span>(cbmc, <span class="dt">features =</span> <span class="kw">c</span>(<span class="st">"adt_CD3"</span>, <span class="st">"adt_CD11c"</span>, <span class="st">"adt_CD8"</span>, <span class="st">"adt_CD16"</span>, <span class="st">"CD3E"</span>, <span class="st">"ITGAX"</span>, <span class="st">"CD8A"</span>,
<span class="st">"FCGR3A"</span>), <span class="dt">min.cutoff =</span> <span class="st">"q05"</span>, <span class="dt">max.cutoff =</span> <span class="st">"q95"</span>, <span class="dt">ncol =</span> <span class="dv">4</span>, <span class="dt">combine=</span><span class="ot">FALSE</span>)
for(i in <span class="dv">1</span>:<span class="kw">length</span>(F)) {
F[[i]] <-<span class="st"> </span>F[[i]] +<span class="st"> </span><span class="kw">NoLegend</span>()
}
cowplot::<span class="kw">plot_grid</span>(<span class="dt">plotlist =</span> F, <span class="dt">ncol =</span> <span class="dv">4</span>)</code></pre></div>
<p><img src="lab_files/figure-html/Visualize%20protein%20expression%20levels%201-1.svg" width="672" style="display: block; margin: auto auto auto 0;" /></p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">RidgePlot</span>(cbmc, <span class="dt">features =</span> <span class="kw">c</span>(<span class="st">"adt_CD3"</span>, <span class="st">"adt_CD11c"</span>, <span class="st">"adt_CD8"</span>, <span class="st">"adt_CD16"</span>), <span class="dt">ncol =</span> <span class="dv">2</span>)</code></pre></div>
<p><img src="lab_files/figure-html/Visualize%20protein%20expression%20levels%202-1.svg" width="672" style="display: block; margin: auto auto auto 0;" /></p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># Draw ADT scatter plots (like biaxial plots for FACS). Note that you can even 'gate' cells if</span>
<span class="co"># desired by using HoverLocator and FeatureLocator</span>
<span class="kw">FeatureScatter</span>(cbmc, <span class="dt">feature1 =</span> <span class="st">"adt_CD19"</span>, <span class="dt">feature2 =</span> <span class="st">"adt_CD3"</span>)</code></pre></div>
<p><img src="lab_files/figure-html/Visualize%20protein%20expression%20levels%203-1.svg" width="672" style="display: block; margin: auto auto auto 0;" /></p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># view relationship between protein and RNA</span>
<span class="kw">FeatureScatter</span>(cbmc, <span class="dt">feature1 =</span> <span class="st">"adt_CD3"</span>, <span class="dt">feature2 =</span> <span class="st">"CD3E"</span>)</code></pre></div>
<p><img src="lab_files/figure-html/Visualize%20protein%20expression%20levels%204-1.svg" width="672" style="display: block; margin: auto auto auto 0;" /></p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># Let's plot CD4 vs CD8 levels in T cells</span>
tcells <-<span class="st"> </span><span class="kw">subset</span>(cbmc, <span class="dt">idents =</span> <span class="kw">c</span>(<span class="st">"Naive CD4 T"</span>, <span class="st">"Memory CD4 T"</span>, <span class="st">"CD8 T"</span>))
<span class="kw">FeatureScatter</span>(tcells, <span class="dt">feature1 =</span> <span class="st">"adt_CD4"</span>, <span class="dt">feature2 =</span> <span class="st">"adt_CD8"</span>)</code></pre></div>
<p><img src="lab_files/figure-html/Visualize%20protein%20expression%20levels%205-1.svg" width="672" style="display: block; margin: auto auto auto 0;" /></p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># # Let's look at the raw (non-normalized) ADT counts. You can see the values are quite high,</span>
<span class="co"># particularly in comparison to RNA values. This is due to the significantly higher protein copy</span>
<span class="co"># number in cells, which significantly reduces 'drop-out' in ADT data</span>
<span class="kw">FeatureScatter</span>(tcells, <span class="dt">feature1 =</span> <span class="st">"adt_CD4"</span>, <span class="dt">feature2 =</span> <span class="st">"adt_CD8"</span>, <span class="dt">slot =</span> <span class="st">"counts"</span>)</code></pre></div>
<p><img src="lab_files/figure-html/Visualize%20protein%20expression%20level%206-1.svg" width="672" style="display: block; margin: auto auto auto 0;" /> If you look a bit more closely, you will see that our CD8 T cell cluster is enriched for CD8 T cells, but still contains many CD4+ CD8- T cells. This is because Naive CD4 and CD8 T cells are quite similar transcriptomically, and the RNA dropout levels for CD4 and CD8 are quite high. This demonstrates the challenge of defining subtle immune cell differences from scRNA-seq data alone.</p>
</div>
<div id="identify-differentially-expressed-proteins-between-clusters" class="section level3">
<h3><span class="header-section-number">1.3.4</span> Identify differentially expressed proteins between clusters</h3>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># Downsample the clusters to a maximum of 300 cells each (makes the heatmap easier to see for</span>
<span class="co"># small clusters)</span>
cbmc.small <-<span class="st"> </span><span class="kw">subset</span>(cbmc, <span class="dt">downsample =</span> <span class="dv">300</span>)
<span class="co"># Find protein markers for all clusters, and draw a heatmap</span>
adt.markers <-<span class="st"> </span><span class="kw">FindAllMarkers</span>(cbmc.small, <span class="dt">assay =</span> <span class="st">"ADT"</span>, <span class="dt">only.pos =</span> <span class="ot">TRUE</span>)
<span class="kw">DoHeatmap</span>(cbmc.small, <span class="dt">features =</span> <span class="kw">unique</span>(adt.markers$gene), <span class="dt">assay =</span> <span class="st">"ADT"</span>, <span class="dt">angle =</span> <span class="dv">90</span>) +<span class="st"> </span><span class="kw">NoLegend</span>()</code></pre></div>
<p><img src="lab_files/figure-html/Identify%20differentailly%202-1.svg" width="672" style="display: block; margin: auto auto auto 0;" /></p>
<p>You can see that our unknown cells co-express both myeloid and lymphoid markers (true at the RNA level as well). They are likely cell clumps (multiplets) that should be discarded. We’ll remove the mouse cells now as well</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">cbmc <-<span class="st"> </span><span class="kw">subset</span>(cbmc, <span class="dt">idents =</span> <span class="kw">c</span>(<span class="st">"Multiplets"</span>, <span class="st">"Mouse"</span>), <span class="dt">invert =</span> <span class="ot">TRUE</span>)</code></pre></div>
</div>
<div id="cluster-directly-on-protein-levels" class="section level3">
<h3><span class="header-section-number">1.3.5</span> Cluster directly on protein levels</h3>
<p>You can also run dimensional reduction and graph-based clustering directly on CITE-seq data</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># Because we're going to be working with the ADT data extensively, we're going to switch the</span>
<span class="co"># default assay to the 'CITE' assay. This will cause all functions to use ADT data by default,</span>
<span class="co"># rather than requiring us to specify it each time</span>
<span class="kw">DefaultAssay</span>(cbmc) <-<span class="st"> "ADT"</span>
cbmc <-<span class="st"> </span><span class="kw">RunPCA</span>(cbmc, <span class="dt">features =</span> <span class="kw">rownames</span>(cbmc), <span class="dt">reduction.name =</span> <span class="st">"pca_adt"</span>, <span class="dt">reduction.key =</span> <span class="st">"pca_adt_"</span>,
<span class="dt">verbose =</span> <span class="ot">FALSE</span>)
<span class="kw">DimPlot</span>(cbmc, <span class="dt">reduction =</span> <span class="st">"pca_adt"</span>)</code></pre></div>
<p><img src="lab_files/figure-html/Cluster%20directly%20on%20protein%20levels-1.svg" width="672" style="display: block; margin: auto auto auto 0;" /></p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># Since we only have 10 markers, instead of doing PCA, we'll just use a standard euclidean</span>
<span class="co"># distance matrix here. Also, this provides a good opportunity to demonstrate how to do</span>
<span class="co"># visualization and clustering using a custom distance matrix in Seurat</span>
adt.data <-<span class="st"> </span><span class="kw">GetAssayData</span>(cbmc, <span class="dt">slot =</span> <span class="st">"data"</span>)
adt.dist <-<span class="st"> </span><span class="kw">dist</span>(<span class="kw">t</span>(adt.data))
<span class="co"># Before we recluster the data on ADT levels, we'll stash the RNA cluster IDs for later</span>
cbmc[[<span class="st">"rnaClusterID"</span>]] <-<span class="st"> </span><span class="kw">Idents</span>(cbmc)
<span class="co"># Now, we rerun tSNE using our distance matrix defined only on ADT (protein) levels.</span>
cbmc[[<span class="st">"tsne_adt"</span>]] <-<span class="st"> </span><span class="kw">RunTSNE</span>(adt.dist, <span class="dt">assay =</span> <span class="st">"ADT"</span>, <span class="dt">reduction.key =</span> <span class="st">"adtTSNE_"</span>)
cbmc[[<span class="st">"adt_snn"</span>]] <-<span class="st"> </span><span class="kw">FindNeighbors</span>(adt.dist)$snn
cbmc <-<span class="st"> </span><span class="kw">FindClusters</span>(cbmc, <span class="dt">resolution =</span> <span class="fl">0.2</span>, <span class="dt">graph.name =</span> <span class="st">"adt_snn"</span>)
<span class="co"># We can compare the RNA and protein clustering, and use this to annotate the protein clustering</span>
<span class="co"># (we could also of course use FindMarkers)</span>
clustering.table <-<span class="st"> </span><span class="kw">table</span>(<span class="kw">Idents</span>(cbmc), cbmc$rnaClusterID)
clustering.table</code></pre></div>
<pre><code>## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 7895
## Number of edges: 258146
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9491
## Number of communities: 11
## Elapsed time: 0 seconds
##
## Memory CD4 T CD14+ Mono Naive CD4 T NK B CD8 T CD16+ Mono
## 0 1754 0 1217 29 0 27 0
## 1 0 2189 0 4 0 0 30
## 2 3 0 2 890 3 1 0
## 3 0 4 0 2 319 0 2
## 4 24 0 18 4 1 243 0
## 5 1 27 4 157 2 2 10
## 6 4 5 0 1 0 0 0
## 7 4 59 4 0 0 0 9
## 8 0 9 0 2 0 0 179
## 9 0 0 1 0 0 0 0
## 10 1 0 2 0 25 0 0
##
## T/Mono doublets CD34+ Eryth Mk DC pDCs
## 0 5 2 4 24 1 2
## 1 1 1 5 25 55 0
## 2 0 1 3 7 2 1
## 3 0 2 2 3 0 0
## 4 0 0 1 2 0 0
## 5 56 0 9 16 6 2
## 6 1 113 81 16 5 0
## 7 117 0 0 2 0 1
## 8 0 0 0 1 0 0
## 9 0 0 0 0 1 43
## 10 2 0 0 0 0 0</code></pre>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">new.cluster.ids <-<span class="st"> </span><span class="kw">c</span>(<span class="st">"CD4 T"</span>, <span class="st">"CD14+ Mono"</span>, <span class="st">"NK"</span>, <span class="st">"B"</span>, <span class="st">"CD8 T"</span>, <span class="st">"NK"</span>, <span class="st">"CD34+"</span>, <span class="st">"T/Mono doublets"</span>,
<span class="st">"CD16+ Mono"</span>, <span class="st">"pDCs"</span>, <span class="st">"B"</span>)
<span class="kw">names</span>(new.cluster.ids) <-<span class="st"> </span><span class="kw">levels</span>(cbmc)
cbmc <-<span class="st"> </span><span class="kw">RenameIdents</span>(cbmc, new.cluster.ids)
tsne_rnaClusters <-<span class="st"> </span><span class="kw">DimPlot</span>(cbmc, <span class="dt">reduction =</span> <span class="st">"tsne_adt"</span>, <span class="dt">group.by =</span> <span class="st">"rnaClusterID"</span>) +<span class="st"> </span><span class="kw">NoLegend</span>()
tsne_rnaClusters <-<span class="st"> </span>tsne_rnaClusters +<span class="st"> </span><span class="kw">ggtitle</span>(<span class="st">"Clustering based on scRNA-seq"</span>) +<span class="st"> </span><span class="kw">theme</span>(<span class="dt">plot.title =</span> <span class="kw">element_text</span>(<span class="dt">hjust =</span> <span class="fl">0.5</span>))
tsne_rnaClusters <-<span class="st"> </span><span class="kw">LabelClusters</span>(<span class="dt">plot =</span> tsne_rnaClusters, <span class="dt">id =</span> <span class="st">"rnaClusterID"</span>, <span class="dt">size =</span> <span class="dv">4</span>)
tsne_adtClusters <-<span class="st"> </span><span class="kw">DimPlot</span>(cbmc, <span class="dt">reduction =</span> <span class="st">"tsne_adt"</span>, <span class="dt">pt.size =</span> <span class="fl">0.5</span>) +<span class="st"> </span><span class="kw">NoLegend</span>()
tsne_adtClusters <-<span class="st"> </span>tsne_adtClusters +<span class="st"> </span><span class="kw">ggtitle</span>(<span class="st">"Clustering based on ADT signal"</span>) +<span class="st"> </span><span class="kw">theme</span>(<span class="dt">plot.title =</span> <span class="kw">element_text</span>(<span class="dt">hjust =</span> <span class="fl">0.5</span>))
tsne_adtClusters <-<span class="st"> </span><span class="kw">LabelClusters</span>(<span class="dt">plot =</span> tsne_adtClusters, <span class="dt">id =</span> <span class="st">"ident"</span>, <span class="dt">size =</span> <span class="dv">4</span>)
<span class="co"># Note: for this comparison, both the RNA and protein clustering are visualized on a tSNE</span>
<span class="co"># generated using the ADT distance matrix.</span>
<span class="kw">CombinePlots</span>(<span class="dt">plots =</span> <span class="kw">list</span>(tsne_rnaClusters, tsne_adtClusters), <span class="dt">ncol =</span> <span class="dv">2</span>)</code></pre></div>
<p><img src="lab_files/figure-html/Cluster%20directly%20on%20protein%20levels%203-1.svg" width="672" style="display: block; margin: auto auto auto 0;" /></p>
<p>The ADT-based clustering yields similar results, but with a few differences</p>
<ul>
<li>Clustering is improved for CD4/CD8 T cell populations, based on the robust ADT data for CD4, CD8, CD14, and CD45RA</li>
<li>However, some clusters for which the ADT data does not contain good distinguishing protein markers (i.e. Mk/Ery/DC) lose separation</li>
</ul>
<p>You can verify this using FindMarkers at the RNA level, as well</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">tcells <-<span class="st"> </span><span class="kw">subset</span>(cbmc, <span class="dt">idents =</span> <span class="kw">c</span>(<span class="st">"CD4 T"</span>, <span class="st">"CD8 T"</span>))
<span class="kw">FeatureScatter</span>(tcells, <span class="dt">feature1 =</span> <span class="st">"CD4"</span>, <span class="dt">feature2 =</span> <span class="st">"CD8"</span>)</code></pre></div>
<p><img src="lab_files/figure-html/Cluster%20directly%20on%20protein%20levels%204-1.svg" width="672" style="display: block; margin: auto auto auto 0;" /></p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">RidgePlot</span>(cbmc, <span class="dt">features =</span> <span class="kw">c</span>(<span class="st">"adt_CD11c"</span>, <span class="st">"adt_CD8"</span>, <span class="st">"adt_CD16"</span>, <span class="st">"adt_CD4"</span>, <span class="st">"adt_CD19"</span>, <span class="st">"adt_CD14"</span>),
<span class="dt">ncol =</span> <span class="dv">2</span>)</code></pre></div>
<p><img src="lab_files/figure-html/Cluster%20directly%20on%20protein%20levels%205-1.svg" width="672" style="display: block; margin: auto auto auto 0;" /></p>
</div>
</div>
</div>
<div id="sparc-seq-exercise" class="section level1">
<h1><span class="header-section-number">2</span> SPARC-seq exercise</h1>
<div id="load-in-the-data-1" class="section level2">
<h2><span class="header-section-number">2.1</span> Load in the data</h2>
<p>This lab demonstrates new features that allow users to analyze and explore multi-modal data from SPARC.</p>
<p>In this exercise we will analyse 256 single cells were RNA-seq data and protein data for 90 intracellular proteins have been generated per cell. The cells are primed embryonic stem cells from human and have been taken at 0, 24 and 48 hours after being stable stem cells. More information about the protocol and the data can be found in the article.</p>
<p>At this point you need to put the data that you downloaded in a folder and point to it.</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">SPARC_protein_RNA_file =<span class="st"> "data/SPARC_RNA_protein.tsv.gz"</span>
SPARC_RNA_RPKM_file =<span class="st"> "data/SPARC.RNA.lrpkm.tsv.gz"</span>
POUF5I_chipSeq_file =<span class="st"> "data/POUF5I_chipSeq_file.tsv"</span>
cbPalette <-<span class="st"> </span><span class="kw">c</span>(<span class="st">"#FF0000"</span>,<span class="st">"#0000FF"</span>, <span class="st">"#0072B2"</span>, <span class="st">"#F0E442"</span> , <span class="st">"#CC79A7"</span>, <span class="st">"#999999"</span>, <span class="st">"#E69F00"</span>, <span class="st">"#56B4E9"</span>)
timePalette <-<span class="st"> </span><span class="kw">c</span>(<span class="st">"#009E73"</span>,<span class="st">"#D55E00"</span>, <span class="st">"#0072B2"</span>,<span class="st">"#000000"</span>, <span class="st">"#F0E442"</span> , <span class="st">"#CC79A7"</span>, <span class="st">"#E69F00"</span>, <span class="st">"#56B4E9"</span>)
moleculePalette <-<span class="st"> </span><span class="kw">c</span>(<span class="st">"#FF0000"</span>,<span class="st">"#0000FF"</span>)
cellCyclePallete <-<span class="kw">c</span>(<span class="st">"#999999"</span> , <span class="st">"#CC79A7"</span>, <span class="st">"#E69F00"</span>)
proteinExamples =<span class="st"> </span><span class="kw">c</span>(<span class="st">"SOX2"</span>, <span class="st">"POU5F1"</span>,<span class="st">"EPCAM"</span>, <span class="st">"TP53"</span>)</code></pre></div>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># Load in the RNA and protein normalised data set</span>
SPARC.data<-<span class="st"> </span><span class="kw">read.table</span>(SPARC_protein_RNA_file, <span class="dt">header =</span> T, <span class="dt">sep =</span> <span class="st">"</span><span class="ch">\t</span><span class="st">"</span>)
<span class="kw">summary</span>(SPARC.data)
<span class="co"># Check what the file contains.</span>
sampleInfoColums =<span class="st"> </span><span class="kw">colnames</span>(SPARC.data)[<span class="kw">c</span>(<span class="dv">2</span>,<span class="dv">3</span>,<span class="dv">4</span>,<span class="dv">8</span>,<span class="dv">9</span>,<span class="dv">11</span>,<span class="dv">12</span>,<span class="dv">13</span>:<span class="dv">23</span>)]
sampleInfo =<span class="st"> </span>SPARC.data %>%<span class="st"> </span><span class="kw">select</span>(sampleInfoColums) %>%<span class="st"> </span>dplyr::<span class="kw">distinct</span>(sample, <span class="dt">.keep_all=</span> <span class="ot">TRUE</span>) </code></pre></div>
<pre><code>## geneID sample type time
## ABL1 : 244 Comb_0h_FACS_G12: 91 FACS: 546 0h :7644
## ADAM9 : 244 Comb_0h_FACS_H12: 91 sc :21658 24h:6734
## ALCAM : 244 Comb_0h_sc_A10 : 91 48h:7826
## ANXA1 : 244 Comb_0h_sc_A11 : 91
## APP : 244 Comb_0h_sc_A12 : 91
## AURKA : 244 Comb_0h_sc_A2 : 91
## (Other):20740 (Other) :21658
## Cq proteinsPerGene CqSumPerGene CqSumPerSample
## Min. : 0.0000 Min. : 1.0 Min. : 0.0195 Min. : 22.30
## 1st Qu.: 0.0000 1st Qu.: 29.0 1st Qu.: 19.5534 1st Qu.: 41.43
## Median : 0.0000 Median : 92.0 Median : 48.1026 Median : 49.24
## Mean : 0.6277 Mean :135.7 Mean : 183.8454 Mean : 57.13
## 3rd Qu.: 0.6221 3rd Qu.:266.0 3rd Qu.: 208.1483 3rd Qu.: 57.18
## Max. :14.6423 Max. :328.0 Max. :2321.8301 Max. :407.72
##
## proteinsPerSample RPKM RPKM_mean RNAsPerSample
## Min. :18.00 Min. :0.000 Min. :0.7065 Min. :3315
## 1st Qu.:36.00 1st Qu.:0.000 1st Qu.:1.5393 1st Qu.:5306
## Median :40.00 Median :0.437 Median :2.4608 Median :5963
## Mean :41.84 Mean :1.506 Mean :2.5157 Mean :5939
## 3rd Qu.:46.00 3rd Qu.:2.952 3rd Qu.:3.1987 3rd Qu.:6613
## Max. :85.00 Max. :6.596 Max. :5.5706 Max. :8206
## NA's :546
## CCcyclone cyclone_G1 cyclone_G2M cyclone_S
## G1 : 273 Min. :0.0000 Min. :0.0000 Min. :0.0000
## G2M : 9555 1st Qu.:0.0000 1st Qu.:0.0390 1st Qu.:0.0280
## S :11830 Median :0.0030 Median :0.3040 Median :0.4000
## NA's: 546 Mean :0.0503 Mean :0.4392 Mean :0.4339
## 3rd Qu.:0.0410 3rd Qu.:0.9000 3rd Qu.:0.8580
## Max. :0.6960 Max. :1.0000 Max. :1.0000
## NA's :546 NA's :546 NA's :546
## CCseurat seurat_S seurat_G2M RNA_seurat_tSNE_1
## G1 : 1729 Min. :-0.3772 Min. :-0.2813 Min. :-17.2206
## G2M :10192 1st Qu.:-0.0298 1st Qu.:-0.0257 1st Qu.: -8.3883
## S : 9737 Median : 0.1113 Median : 0.1334 Median : 2.8253
## NA's: 546 Mean : 0.1014 Mean : 0.1738 Mean : -0.0394
## 3rd Qu.: 0.2425 3rd Qu.: 0.3316 3rd Qu.: 6.7580
## Max. : 0.5795 Max. : 0.9882 Max. : 12.4551
## NA's :546 NA's :546 NA's :546
## RNA_tSNE_2 RNA_tSNE_3 RNA_seurat_pseudoTime
## Min. :-23.9167 Min. :-10.3168 Min. : 0.000
## 1st Qu.:-13.1557 1st Qu.: -5.0088 1st Qu.: 6.847
## Median : 5.2974 Median : 0.7028 Median :20.984
## Mean : -0.0689 Mean : -0.0566 Mean :23.033
## 3rd Qu.: 9.6313 3rd Qu.: 4.2409 3rd Qu.:39.357
## Max. : 17.0882 Max. : 10.4958 Max. :48.000
## NA's :546 NA's :546 NA's :546</code></pre>
</div>
<div id="initial-filtering-and-qc" class="section level2">
<h2><span class="header-section-number">2.2</span> Initial filtering and QC</h2>
<p>We have generated data for 100 cells and it should be a correlation between the expression of 100 cells and the expression of 100 single cells.</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># remove all genes that does not have good representation in the FACS sorted 100 cells </span>
<span class="co">#Keep only FACS data</span>
SPARC.data.FACS =<span class="st"> </span>SPARC.data[SPARC.data$type ==<span class="st"> "FACS"</span>, ] %>%<span class="st"> </span><span class="kw">select</span>(geneID, time,Cq)
<span class="co"># Create similair data using sc data by calculating mean multiply qith 100 and divide by number of samples</span>
SPARC.data.sc =<span class="st"> </span>SPARC.data[SPARC.data$type !=<span class="st"> "FACS"</span>, ]
SPARC.data.sc.summarize =<span class="st"> </span>SPARC.data.sc %>%<span class="st"> </span>dplyr::<span class="kw">group_by</span>(geneID, time,type) %>%
<span class="st"> </span>dplyr::<span class="kw">summarize</span>(<span class="dt">Cq_sc =</span> <span class="kw">sum</span>(Cq)*<span class="dv">100</span> /<span class="kw">n</span>() )
<span class="co"># Merge the two too see how many that correlate well. </span>
test =<span class="st"> </span><span class="kw">merge</span>(SPARC.data.FACS,<span class="kw">as.data.frame</span>(SPARC.data.sc.summarize) ,<span class="dt">by =</span> <span class="kw">c</span>(<span class="st">"geneID"</span>, <span class="st">"time"</span>))
<span class="kw">ggplot</span>(<span class="dt">data =</span> test, <span class="dt">mapping =</span> <span class="kw">aes</span>(Cq_sc,Cq))+<span class="st"> </span><span class="kw">geom_point</span>()</code></pre></div>
<p><img src="lab_files/figure-html/Initial%20analysis%20on%20protein%20data-1.svg" width="672" style="display: block; margin: auto auto auto 0;" /> As shown by the plot there is a correlation between the single cells and the FACS data if the FACS expression level is high but not if the FACS expression level is low.</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># Keep all proteins where there is at least one FACS sample with Cq > 3</span>
geneID_QC_FACS =<span class="st"> </span><span class="kw">as.character</span>(<span class="kw">unique</span>(SPARC.data.FACS$geneID[SPARC.data.FACS$Cq ><span class="st"> </span><span class="dv">3</span> ]))
<span class="co"># remove all genes where the logged RPKM is less than 1</span>
geneID_QC_RPKM =<span class="st"> </span><span class="kw">as.character</span>(<span class="kw">unique</span>(SPARC.data$geneID[SPARC.data$RPKM_mean ><span class="st"> </span><span class="dv">1</span> ]))
<span class="co"># Keep only the genes that is found in both sets</span>
geneID_QC =<span class="st"> </span><span class="kw">intersect</span>(geneID_QC_FACS,geneID_QC_RPKM)
<span class="co">#Create QC data set</span>
SPARC.data.QC =<span class="st"> </span>SPARC.data[SPARC.data$geneID %in%<span class="st"> </span>geneID_QC, ]</code></pre></div>
<p>Now we have a data set with genes</p>
</div>
<div id="correlation-between-the-rna-and-the-protein-expression-levels-in-the-cell." class="section level2">
<h2><span class="header-section-number">2.3</span> Correlation between the RNA and the protein expression levels in the cell.</h2>
<p>As a first glimpse of the data and how it changes over time is to view how the different proteins changes over time.</p>
<div id="produce-violin-plot-for-both-data" class="section level3">
<h3><span class="header-section-number">2.3.1</span> Produce violin plot for both data</h3>
<p>Here we can compare if the data that we get for protein and RNA and separate them over time. The dots in the violin plots are the FACS results.</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">## Plot function used in the sample
plotExpressionViolinPlot <-<span class="st"> </span>function(expressionInfoGene,geneID ,moleculePalette, <span class="dt">legend =</span> <span class="ot">FALSE</span>){
plot =<span class="st"> </span><span class="kw">ggplot</span>(expressionInfoGene[expressionInfoGene$type ==<span class="st"> "sc"</span>,],
<span class="kw">aes</span> (<span class="dt">x =</span> time, <span class="dt">y =</span> expression, <span class="dt">fill =</span> molecule)
)+<span class="st"> </span>
<span class="st"> </span><span class="kw">geom_violin</span>(<span class="dt">size =</span> <span class="fl">0.5</span>)+
<span class="st"> </span><span class="kw">scale_fill_manual</span>(<span class="dt">values=</span>moleculePalette)+
<span class="st"> </span><span class="kw">geom_point</span>(<span class="dt">data =</span>expressionInfoGene[expressionInfoGene$type ==<span class="st"> "FACS"</span>,],
<span class="kw">aes</span> (<span class="dt">x =</span> time, <span class="dt">y =</span> expression, <span class="dt">fill =</span> molecule),<span class="dt">size =</span> <span class="fl">0.5</span>) +<span class="st"> </span>
<span class="st"> </span><span class="kw">facet_grid</span>(<span class="dt">rows =</span> <span class="st">"molecule"</span>)
if(!legend){
plot =<span class="st"> </span>plot +<span class="st"> </span><span class="kw">theme</span>(<span class="dt">legend.position=</span><span class="st">"none"</span>)
}
plot =<span class="st"> </span>plot +<span class="st"> </span><span class="kw">ggtitle</span>(geneID)+<span class="kw">theme</span>(<span class="dt">axis.text.x =</span> <span class="kw">element_text</span>(<span class="dt">angle =</span> <span class="dv">90</span>, <span class="dt">hjust =</span> <span class="dv">1</span>), <span class="dt">text =</span> <span class="kw">element_text</span>(<span class="dt">size=</span><span class="dv">7</span>))+
<span class="st"> </span><span class="kw">theme</span>(<span class="dt">axis.text.x =</span> <span class="kw">element_text</span>(<span class="dt">size =</span> <span class="dv">7</span>, <span class="dt">angle =</span> <span class="dv">90</span>, <span class="dt">hjust =</span> <span class="dv">1</span>, <span class="dt">vjust =</span> .<span class="dv">5</span>),
<span class="dt">axis.text.y =</span> <span class="kw">element_text</span>(<span class="dt">size =</span> <span class="dv">7</span>, <span class="dt">angle =</span> <span class="dv">0</span>),
<span class="dt">axis.title.x =</span> <span class="kw">element_text</span>(<span class="dt">size =</span> <span class="dv">8</span>),
<span class="dt">axis.title.y =</span> <span class="kw">element_text</span>(<span class="dt">size =</span> <span class="dv">8</span>),
<span class="dt">plot.title =</span> <span class="kw">element_text</span>(<span class="dt">size=</span><span class="dv">10</span>))
<span class="kw">return</span>(plot)
}
## Select only the relevant columns used in this analysis
expressionInfo =<span class="st"> </span>SPARC.data.QC %>%<span class="st"> </span><span class="kw">select</span>(geneID, sample, RPKM, Cq, time, type)
## Put RNA levels and protein levels in the same column
expressionInfoGathered =<span class="st"> </span>expressionInfo %>%<span class="st"> </span><span class="kw">gather</span>(RPKM, Cq , <span class="dt">key =</span> molecule, <span class="dt">value =</span> expression)
violinPlots =<span class="st"> </span><span class="kw">list</span>()
for(i in <span class="dv">1</span>:<span class="kw">length</span>(geneID_QC)){
<span class="co"># Extract information pergene</span>
expressionInfoGene =<span class="st"> </span>expressionInfoGathered[expressionInfoGathered$geneID ==<span class="st"> </span>geneID_QC[i],]
<span class="co"># Create violin plot for data</span>
plot =<span class="st"> </span><span class="kw">plotExpressionViolinPlot</span>(expressionInfoGene,geneID_QC[i] , moleculePalette)
<span class="co"># add the plots in the list</span>
violinPlots[i] =<span class="st"> </span><span class="kw">list</span>(plot)
}
<span class="co"># Plot example proteins to screen</span>
violinPlotExample =<span class="st"> </span>violinPlots[<span class="kw">which</span>(geneID_QC %in%<span class="st"> </span>proteinExamples)]
<span class="kw">grid.arrange</span>(<span class="dt">grobs =</span> violinPlotExample, <span class="dt">ncol=</span><span class="dv">2</span>)
<span class="co"># Plot all to file</span>
<span class="co">#filename = paste( "figures/violinPlots_multiple.pdf",sep = "/")</span>
<span class="co">#ggsave(filename, marrangeGrob(grobs = violinPlots, nrow=3, ncol=3) ,width = 4.51, height = 7.29)</span></code></pre></div>
<p><img src="lab_files/figure-html/violin%20plot-1.svg" width="672" style="display: block; margin: auto auto auto 0;" /></p>
</div>
<div id="produce-scatter-plot-for-both-data" class="section level3">
<h3><span class="header-section-number">2.3.2</span> Produce scatter plot for both data</h3>
<p>Here we can look at each single cell and see how they correlate.</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># extract relevant column from the data</span>
expressionInfo =<span class="st"> </span>SPARC.data %>%<span class="st"> </span><span class="kw">select</span>(geneID, sample, RPKM, Cq, time, type)
<span class="co">#only look at single cell samples</span>
expressionInfo.sc =<span class="st"> </span>expressionInfo[expressionInfo$type ==<span class="st"> "sc"</span>, ]
scatterPlots =<span class="st"> </span><span class="kw">list</span>()
for(i in <span class="dv">1</span>:<span class="kw">length</span>(geneID_QC)){
expressionInfo.sc.gene =<span class="st"> </span>expressionInfo.sc[expressionInfo.sc$geneID ==<span class="st"> </span>geneID_QC[i],]
scatterPlot =<span class="st"> </span><span class="kw">ggplot</span>(expressionInfo.sc.gene,
<span class="kw">aes</span>(RPKM,Cq, <span class="dt">color =</span> time)) +<span class="st"> </span>
<span class="st"> </span><span class="kw">geom_point</span>(<span class="dt">size =</span> <span class="fl">0.5</span>) +<span class="st"> </span>
<span class="st"> </span><span class="kw">theme</span>(<span class="dt">legend.position =</span> <span class="st">"none"</span>)+
<span class="st"> </span><span class="kw">ggtitle</span>(geneID_QC[i])+<span class="st"> </span>
<span class="st"> </span><span class="kw">scale_colour_manual</span>(<span class="dt">values=</span>timePalette)+<span class="st"> </span>
<span class="st"> </span><span class="kw">theme</span>(<span class="dt">axis.text.x =</span> <span class="kw">element_text</span>(<span class="dt">size =</span> <span class="dv">7</span>, <span class="dt">angle =</span> <span class="dv">90</span>, <span class="dt">hjust =</span> <span class="dv">1</span>, <span class="dt">vjust =</span> .<span class="dv">5</span>),
<span class="dt">axis.text.y =</span> <span class="kw">element_text</span>(<span class="dt">size =</span> <span class="dv">7</span>, <span class="dt">angle =</span> <span class="dv">0</span>),
<span class="dt">axis.title.x =</span> <span class="kw">element_text</span>(<span class="dt">size =</span> <span class="dv">8</span>),
<span class="dt">axis.title.y =</span> <span class="kw">element_text</span>(<span class="dt">size =</span> <span class="dv">8</span>),
<span class="dt">plot.title =</span> <span class="kw">element_text</span>(<span class="dt">size=</span><span class="dv">10</span>))
scatterPlots[[geneID_QC[i]]] =<span class="st"> </span>scatterPlot
}
<span class="co"># Plot example proteins to screen</span>
scatterPlotExample =<span class="st"> </span>scatterPlots[<span class="kw">which</span>(geneID_QC %in%<span class="st"> </span>proteinExamples)]
<span class="kw">grid.arrange</span>(<span class="dt">grobs =</span> scatterPlotExample, <span class="dt">ncol=</span><span class="dv">2</span>)
<span class="co"># Plot all to file</span>
<span class="co">#filename = paste( "figures/scatterPlot_multiple.pdf",sep = "/")</span>
<span class="co">#ggsave(filename, marrangeGrob(grobs = violinPlots, nrow=3, ncol=3) ,width = 4.51, height = 7.29)</span></code></pre></div>
<p><img src="lab_files/figure-html/scatter_plot%20for%20SPARC%20data-1.svg" width="672" style="display: block; margin: auto auto auto 0;" /></p>
<p>As you can see there is little correlation between the RNA levels and the protein levels.</p>
<p>But still there are some separation between the different times. Maybe a better way to see if there is any correlation is to plot them over time.</p>
</div>
<div id="plot-the-expression-over-time." class="section level3">
<h3><span class="header-section-number">2.3.3</span> Plot the expression over time.</h3>
<p>In the data file we have determined the pseudo time of each cell. We now want to see if there is better correlation if we compare the data over time.</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># extract relevant column from the data</span>
expressionInfo =<span class="st"> </span>SPARC.data %>%<span class="st"> </span><span class="kw">select</span>(geneID, sample, RPKM, Cq,RNA_seurat_pseudoTime , type)
<span class="co">#only look at single cell samples</span>
expressionInfo.sc =<span class="st"> </span>expressionInfo[expressionInfo$type ==<span class="st"> "sc"</span>, ]
expressionInfo.sc.gathered =<span class="st"> </span>expressionInfo.sc %>%<span class="st"> </span><span class="kw">gather</span>(RPKM, Cq , <span class="dt">key =</span> molecule, <span class="dt">value =</span> expression)
scatterPlots =<span class="st"> </span><span class="kw">list</span>()
for(i in <span class="dv">1</span>:<span class="kw">length</span>(geneID_QC)){
expressionInfo.sc.gathered.gene =<span class="st"> </span>expressionInfo.sc.gathered[expressionInfo.sc$geneID ==<span class="st"> </span>geneID_QC[i],]
scatterPlot =<span class="st"> </span><span class="kw">ggplot</span>(expressionInfo.sc.gathered.gene,
<span class="kw">aes</span>(RNA_seurat_pseudoTime,expression, <span class="dt">color =</span> molecule)) +<span class="st"> </span>
<span class="st"> </span><span class="kw">geom_point</span>(<span class="dt">size =</span> <span class="fl">0.5</span>) +<span class="st"> </span>
<span class="st"> </span><span class="kw">theme</span>(<span class="dt">legend.position =</span> <span class="st">"none"</span>)+
<span class="st"> </span><span class="kw">geom_smooth</span>()+<span class="st"> </span>
<span class="st"> </span><span class="kw">ggtitle</span>(geneID_QC[i])+<span class="st"> </span>
<span class="st"> </span><span class="kw">scale_colour_manual</span>(<span class="dt">values=</span>moleculePalette)+<span class="st"> </span>
<span class="st"> </span><span class="kw">theme</span>(<span class="dt">axis.text.x =</span> <span class="kw">element_text</span>(<span class="dt">size =</span> <span class="dv">7</span>, <span class="dt">angle =</span> <span class="dv">90</span>, <span class="dt">hjust =</span> <span class="dv">1</span>, <span class="dt">vjust =</span> .<span class="dv">5</span>),
<span class="dt">axis.text.y =</span> <span class="kw">element_text</span>(<span class="dt">size =</span> <span class="dv">7</span>, <span class="dt">angle =</span> <span class="dv">0</span>),
<span class="dt">axis.title.x =</span> <span class="kw">element_text</span>(<span class="dt">size =</span> <span class="dv">8</span>),
<span class="dt">axis.title.y =</span> <span class="kw">element_text</span>(<span class="dt">size =</span> <span class="dv">8</span>),
<span class="dt">plot.title =</span> <span class="kw">element_text</span>(<span class="dt">size=</span><span class="dv">10</span>))
scatterPlots[[geneID_QC[i]]] =<span class="st"> </span>scatterPlot
}
<span class="co"># Plot example proteins to screen</span>
scatterPlotExample =<span class="st"> </span>scatterPlots[<span class="kw">which</span>(geneID_QC %in%<span class="st"> </span>proteinExamples)]
<span class="kw">grid.arrange</span>(<span class="dt">grobs =</span> scatterPlotExample, <span class="dt">ncol=</span><span class="dv">2</span>)
<span class="co">#Plot all to file</span>
<span class="co">#filename = paste( "figures/scatterPlot_pseudoTime_multiple.pdf",sep = "/")</span>
<span class="co">#ggsave(filename, marrangeGrob(grobs = violinPlots, nrow=3, ncol=3) ,width = 4.51, height = 7.29)</span></code></pre></div>
<p><img src="lab_files/figure-html/scatterplot%20over%20time%20for%20SPARC%20data-1.svg" width="672" style="display: block; margin: auto auto auto 0;" /></p>
<p>As you can see the expression, independent for RNA and protein, change in the same way. One exception is TP53 that is known to have a post transcriptional regulation during the development and is also seen in the data.</p>
</div>
</div>
<div id="create-a-pseudotime-using-the-protein" class="section level2">
<h2><span class="header-section-number">2.4</span> Create a pseudotime using the protein</h2>
<p>In this analysis we will analyse if protein data also represent the differentiation of cells over time.</p>
<p>To do that we will reduce the dimensions of the proteins space using tSNE and PCA and then use the reduced spaces with a linear pseudo-time program SCORPIUS to determine a pseudo time for each cell.</p>
<div id="data-structure-handling" class="section level3">
<h3><span class="header-section-number">2.4.1</span> Data structure handling</h3>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">## keep only single cells and only protein expression information
SPARC.data.sc.protein =<span class="st"> </span>SPARC.data.QC %>%
<span class="st"> </span><span class="kw">filter</span>(type ==<span class="st">"sc"</span>) %>%<span class="st"> </span><span class="co">#only single cell</span>
<span class="st"> </span><span class="kw">select</span>(geneID, sample, Cq) %>%<span class="st"> </span><span class="co"># select columns that are important</span>
<span class="st"> </span><span class="kw">spread</span>(<span class="dt">key =</span> sample, <span class="dt">value =</span> Cq) <span class="co"># spread the data to a matrix</span>
<span class="co">#add rownames </span>
<span class="kw">rownames</span>(SPARC.data.sc.protein) =<span class="st"> </span>SPARC.data.sc.protein$geneID
<span class="co"># remove geneID column and transpose data</span>
SPARC.data.sc.protein.matrix =<span class="st"> </span><span class="kw">t</span>(SPARC.data.sc.protein %>%<span class="st"> </span><span class="kw">select</span>(-geneID))
<span class="co"># Normalise protein data so that each cell has the same protein level. </span>
SPARC.data.sc.protein.matrix.normalise =<span class="st"> </span>SPARC.data.sc.protein.matrix/<span class="kw">rowSums</span>(SPARC.data.sc.protein.matrix)</code></pre></div>
</div>
<div id="dimensionality-reductions-using-pca" class="section level3">
<h3><span class="header-section-number">2.4.2</span> Dimensionality reductions using PCA</h3>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">res.pca <-<span class="st"> </span><span class="kw">prcomp</span>(SPARC.data.sc.protein.matrix.normalise, <span class="dt">scale =</span> <span class="ot">TRUE</span>)
##fviz_eig(res.pca)
PCAresults =<span class="st"> </span><span class="kw">as.data.frame</span>(res.pca$x)[<span class="dv">1</span>:<span class="dv">3</span>]
<span class="kw">colnames</span>(PCAresults) =<span class="st"> </span><span class="kw">c</span>(<span class="st">"PCA_protein1"</span>,<span class="st">"PCA_protein2"</span>,<span class="st">"PCA_protein3"</span> )
PCAresults$sample =<span class="st"> </span><span class="kw">rownames</span>(PCAresults)
sampleInfo =<span class="st"> </span><span class="kw">inner_join</span>(sampleInfo,PCAresults )
<span class="kw">ggplot</span>(sampleInfo, <span class="kw">aes</span>(<span class="dt">x =</span> PCA_protein1, <span class="dt">y =</span> PCA_protein2, <span class="dt">color =</span> time))+<span class="st"> </span>
<span class="st"> </span><span class="kw">geom_point</span>()+
<span class="st"> </span><span class="kw">scale_colour_manual</span>(<span class="dt">values=</span>timePalette)</code></pre></div>
<p><img src="lab_files/figure-html/Dimensionality%20reductions%20using%20PCA-1.svg" width="672" style="display: block; margin: auto auto auto 0;" /> ### Pseudo time analysis on PCA vectors using SCORPIUS</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">PCASpace =<span class="st"> </span>sampleInfo %>%<span class="st"> </span><span class="kw">select</span>(PCA_protein1,PCA_protein2,PCA_protein3)
trajPCA <-<span class="st"> </span><span class="kw">infer_trajectory</span>(PCASpace)
<span class="kw">draw_trajectory_plot</span>(PCASpace, <span class="dt">progression_group =</span> sampleInfo$time, <span class="dt">path =</span> trajPCA$path )+<span class="st"> </span><span class="kw">scale_color_manual</span>(<span class="dt">values =</span> timePalette)
<span class="co"># add predicted pseudo time to sampleInfo. </span>
sampleInfo[<span class="st">"pseudoTimeProteinPCA"</span>] =<span class="st"> </span>trajPCA$time*<span class="dv">48</span></code></pre></div>
<p><img src="lab_files/figure-html/pseudo%20time%20analysis%20on%20PCA%20using%20SCORPIUS-1.svg" width="672" style="display: block; margin: auto auto auto 0;" /></p>
</div>
<div id="dimensionality-reductions-using-tsne" class="section level3">
<h3><span class="header-section-number">2.4.3</span> Dimensionality reductions using tSNE</h3>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">set.seed</span>(<span class="dv">1201</span>)
tSNE_protein =<span class="kw">Rtsne</span>(SPARC.data.sc.protein.matrix.normalise,<span class="dt">dims=</span><span class="dv">3</span>,<span class="dt">theta =</span> <span class="fl">0.0</span> , <span class="dt">initial_dims =</span> <span class="dv">150</span> ,<span class="dt">perplexity=</span><span class="dv">30</span>, <span class="dt">PCA =</span> <span class="ot">FALSE</span>)
tsneProjection =<span class="st"> </span><span class="kw">as.data.frame</span>(tSNE_protein$Y)
<span class="kw">colnames</span>(tsneProjection) =<span class="st"> </span><span class="kw">c</span>(<span class="st">"tSNE_protein1"</span>,<span class="st">"tSNE_protein2"</span>,<span class="st">"tSNE_protein3"</span> )
tsneProjection$sample =<span class="st"> </span><span class="kw">rownames</span>(SPARC.data.sc.protein.matrix)
sampleInfo =<span class="st"> </span><span class="kw">inner_join</span>(sampleInfo, tsneProjection)
<span class="co">#sampleInfo2 = inner_join(sampleInfo1,PCAProjection2)</span>
<span class="kw">ggplot</span>(sampleInfo, <span class="kw">aes</span>(<span class="dt">x =</span> tSNE_protein1, <span class="dt">y =</span> tSNE_protein2, <span class="dt">color =</span> time))+<span class="st"> </span>
<span class="st"> </span><span class="kw">geom_point</span>()+
<span class="st"> </span><span class="kw">scale_colour_manual</span>(<span class="dt">values=</span>timePalette)</code></pre></div>
<p><img src="lab_files/figure-html/Dimensionality%20reductions%20using%20tSNE-1.svg" width="672" style="display: block; margin: auto auto auto 0;" /></p>
</div>
<div id="pseudo-time-analysis-on-tsne-vectors-using-scorpius" class="section level3">
<h3><span class="header-section-number">2.4.4</span> Pseudo time analysis on tSNE vectors using SCORPIUS</h3>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">library</span>(SCORPIUS)
tsneSpace =<span class="st"> </span>sampleInfo %>%<span class="st"> </span><span class="kw">select</span>(tSNE_protein1,tSNE_protein2,tSNE_protein3)
trajtSNE <-<span class="st"> </span><span class="kw">infer_trajectory</span>(tsneSpace)
<span class="kw">draw_trajectory_plot</span>(tsneSpace, <span class="dt">progression_group =</span> sampleInfo$time,
<span class="dt">path =</span> trajtSNE$path )+<span class="st"> </span>
<span class="st"> </span><span class="kw">scale_color_manual</span>(<span class="dt">values =</span> timePalette)
sampleInfo[ <span class="st">"pseudoTimeProtein_tSNE"</span>] =<span class="st"> </span>trajtSNE$time*<span class="dv">48</span></code></pre></div>
<p><img src="lab_files/figure-html/pseudo%20time%20analysis%20on%20tSNE%20using%20SCORPIUS-1.svg" width="672" style="display: block; margin: auto auto auto 0;" /></p>
</div>
<div id="pseudo-time-comparison-between-rna-and-protein-and-methods" class="section level3">
<h3><span class="header-section-number">2.4.5</span> Pseudo time comparison between RNA and protein and methods</h3>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="kw">library</span>(knitr)
<span class="kw">ggplot</span>(sampleInfo, <span class="kw">aes</span>(<span class="dt">x =</span> pseudoTimeProteinPCA, <span class="dt">y =</span> pseudoTimeProtein_tSNE, <span class="dt">color =</span> RNA_seurat_pseudoTime))+<span class="st"> </span><span class="kw">geom_point</span>()
PseudoTime =<span class="st"> </span>sampleInfo %>%<span class="st"> </span><span class="kw">select</span> (pseudoTimeProteinPCA,pseudoTimeProtein_tSNE, RNA_seurat_pseudoTime)</code></pre></div>
<p><img src="lab_files/figure-html/Comparing%20pseudo%20time%20results-1.svg" width="672" style="display: block; margin: auto auto auto 0;" /></p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># Pearson correlation score between the three different samples</span>
<span class="kw">kable</span>(<span class="kw">cor</span>(PseudoTime, <span class="dt">method =</span> <span class="st">"pearson"</span>))</code></pre></div>
<table>
<thead>
<tr class="header">
<th></th>
<th align="right">pseudoTimeProteinPCA</th>
<th align="right">pseudoTimeProtein_tSNE</th>
<th align="right">RNA_seurat_pseudoTime</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td>pseudoTimeProteinPCA</td>
<td align="right">1.0000000</td>
<td align="right">0.9111081</td>
<td align="right">0.8220609</td>
</tr>
<tr class="even">
<td>pseudoTimeProtein_tSNE</td>
<td align="right">0.9111081</td>
<td align="right">1.0000000</td>
<td align="right">0.8606640</td>
</tr>
<tr class="odd">
<td>RNA_seurat_pseudoTime</td>
<td align="right">0.8220609</td>