diff --git a/eggstrain/src/execution/operators/sort.rs b/eggstrain/src/execution/operators/sort.rs index 27ffacf..4e7edac 100644 --- a/eggstrain/src/execution/operators/sort.rs +++ b/eggstrain/src/execution/operators/sort.rs @@ -30,17 +30,21 @@ impl Sort { } } - fn sort_in_mem(&self, rb: RecordBatch) -> Result { - assert_eq!(rb.schema(), self.input_schema); + fn sort_in_mem( + rb: RecordBatch, + limit_size: Option, + sort_expr: Vec, + ) -> Result { + // assert_eq!(rb.schema(), self.input_schema); - let expressions = self.sort_expr.clone(); + let expressions = sort_expr.clone(); let sort_columns = expressions .iter() .map(|expr| expr.evaluate_to_sort_column(&rb)) .collect::>>()?; - let indices = lexsort_to_indices(&sort_columns, self.limit_size)?; + let indices = lexsort_to_indices(&sort_columns, limit_size)?; let columns = rb .columns() @@ -90,9 +94,11 @@ impl UnaryOperator for Sort { } let merged_batch = concat_batches(&self.input_schema, &batches); - match merged_batch { + let limit_size = self.limit_size; + let sort_expr = self.sort_expr.clone(); + rayon::spawn(move || match merged_batch { Ok(merged_batch) => { - let sorted_batch = self.sort_in_mem(merged_batch).unwrap(); + let sorted_batch = Sort::sort_in_mem(merged_batch, limit_size, sort_expr).unwrap(); let mut current = 0; let total_rows = sorted_batch.num_rows(); while current + BATCH_SIZE < total_rows { @@ -104,11 +110,8 @@ impl UnaryOperator for Sort { let batch_to_send = sorted_batch.slice(current, total_rows - current); tx.send(batch_to_send) .expect("Unable to send the last sorted batch"); - - // TODO: do I have to call drop here manually or will rust take care of it? - // drop(sorted_batch); } Err(_) => todo!("Could not concat the batches for sorting"), - } + }); } } diff --git a/eggstrain/src/main.rs b/eggstrain/src/main.rs index df04a2f..e97ae0c 100644 --- a/eggstrain/src/main.rs +++ b/eggstrain/src/main.rs @@ -14,6 +14,7 @@ async fn main() -> Result<()> { // Run our execution engine on the physical plan let df_physical_plan = sql.clone().create_physical_plan().await?; + let df_physical_plan = df_physical_plan.children()[0].clone(); let results = run(df_physical_plan).await; results.into_iter().for_each(|batch| { diff --git a/proposal/images/hashjoin.svg b/proposal/images/hashjoin.svg new file mode 100644 index 0000000..5f95a8e --- /dev/null +++ b/proposal/images/hashjoin.svg @@ -0,0 +1,4 @@ + + + +
Table Scan
Table Scan
Tx
Tx
Move
Hash Build
Hash Probe
Rayon Thread
Async Land
Sync Land 
Rx
Rx
Tx
Rx
Tx
\ No newline at end of file diff --git a/proposal/presentation.html b/proposal/presentation.html index ca6c96e..c778008 100644 --- a/proposal/presentation.html +++ b/proposal/presentation.html @@ -1,4 +1,4 @@ -
-

Execution Engine: KCS

+ /* content:""; */display:table}div#\:\$p>svg>foreignObject>section:after{clear:both}div#\:\$p>svg>foreignObject>section>:first-child{margin-top:0!important}div#\:\$p>svg>foreignObject>section>:last-child{margin-bottom:0!important}div#\:\$p>svg>foreignObject>section a:not([href]){color:inherit;text-decoration:none}div#\:\$p>svg>foreignObject>section .absent{color:var(--color-danger-fg)}div#\:\$p>svg>foreignObject>section .anchor{float:left;line-height:1;margin-left:-20px;padding-right:4px}div#\:\$p>svg>foreignObject>section .anchor:focus{outline:none}div#\:\$p>svg>foreignObject>section :is(pre,marp-pre),div#\:\$p>svg>foreignObject>section blockquote,div#\:\$p>svg>foreignObject>section details,div#\:\$p>svg>foreignObject>section dl,div#\:\$p>svg>foreignObject>section ol,div#\:\$p>svg>foreignObject>section p,div#\:\$p>svg>foreignObject>section table,div#\:\$p>svg>foreignObject>section ul{margin-bottom:16px;margin-top:0}div#\:\$p>svg>foreignObject>section blockquote>:first-child{margin-top:0}div#\:\$p>svg>foreignObject>section blockquote>:last-child{margin-bottom:0}div#\:\$p>svg>foreignObject>section :is(h1,marp-h1) .octicon-link,div#\:\$p>svg>foreignObject>section :is(h2,marp-h2) .octicon-link,div#\:\$p>svg>foreignObject>section :is(h3,marp-h3) .octicon-link,div#\:\$p>svg>foreignObject>section :is(h4,marp-h4) .octicon-link,div#\:\$p>svg>foreignObject>section :is(h5,marp-h5) .octicon-link,div#\:\$p>svg>foreignObject>section :is(h6,marp-h6) .octicon-link{color:var(--color-fg-default);vertical-align:middle;visibility:hidden}div#\:\$p>svg>foreignObject>section :is(h1,marp-h1):hover .anchor,div#\:\$p>svg>foreignObject>section :is(h2,marp-h2):hover .anchor,div#\:\$p>svg>foreignObject>section :is(h3,marp-h3):hover .anchor,div#\:\$p>svg>foreignObject>section :is(h4,marp-h4):hover .anchor,div#\:\$p>svg>foreignObject>section :is(h5,marp-h5):hover .anchor,div#\:\$p>svg>foreignObject>section :is(h6,marp-h6):hover .anchor{text-decoration:none}div#\:\$p>svg>foreignObject>section :is(h1,marp-h1):hover .anchor .octicon-link,div#\:\$p>svg>foreignObject>section :is(h2,marp-h2):hover .anchor .octicon-link,div#\:\$p>svg>foreignObject>section :is(h3,marp-h3):hover .anchor .octicon-link,div#\:\$p>svg>foreignObject>section :is(h4,marp-h4):hover .anchor .octicon-link,div#\:\$p>svg>foreignObject>section :is(h5,marp-h5):hover .anchor .octicon-link,div#\:\$p>svg>foreignObject>section :is(h6,marp-h6):hover .anchor .octicon-link{visibility:visible}div#\:\$p>svg>foreignObject>section :is(h1,marp-h1) code,div#\:\$p>svg>foreignObject>section :is(h1,marp-h1) tt,div#\:\$p>svg>foreignObject>section :is(h2,marp-h2) code,div#\:\$p>svg>foreignObject>section :is(h2,marp-h2) tt,div#\:\$p>svg>foreignObject>section :is(h3,marp-h3) code,div#\:\$p>svg>foreignObject>section :is(h3,marp-h3) tt,div#\:\$p>svg>foreignObject>section :is(h4,marp-h4) code,div#\:\$p>svg>foreignObject>section :is(h4,marp-h4) tt,div#\:\$p>svg>foreignObject>section :is(h5,marp-h5) code,div#\:\$p>svg>foreignObject>section :is(h5,marp-h5) tt,div#\:\$p>svg>foreignObject>section :is(h6,marp-h6) code,div#\:\$p>svg>foreignObject>section :is(h6,marp-h6) tt{font-size:inherit;padding:0 .2em}div#\:\$p>svg>foreignObject>section summary :is(h1,marp-h1),div#\:\$p>svg>foreignObject>section summary :is(h2,marp-h2),div#\:\$p>svg>foreignObject>section summary :is(h3,marp-h3),div#\:\$p>svg>foreignObject>section summary :is(h4,marp-h4),div#\:\$p>svg>foreignObject>section summary :is(h5,marp-h5),div#\:\$p>svg>foreignObject>section summary :is(h6,marp-h6){display:inline-block}div#\:\$p>svg>foreignObject>section summary :is(h1,marp-h1) .anchor,div#\:\$p>svg>foreignObject>section summary :is(h2,marp-h2) .anchor,div#\:\$p>svg>foreignObject>section summary :is(h3,marp-h3) .anchor,div#\:\$p>svg>foreignObject>section summary :is(h4,marp-h4) .anchor,div#\:\$p>svg>foreignObject>section summary :is(h5,marp-h5) .anchor,div#\:\$p>svg>foreignObject>section summary :is(h6,marp-h6) .anchor{margin-left:-40px}div#\:\$p>svg>foreignObject>section summary :is(h1,marp-h1),div#\:\$p>svg>foreignObject>section summary :is(h2,marp-h2){border-bottom:0;padding-bottom:0}div#\:\$p>svg>foreignObject>section ol.no-list,div#\:\$p>svg>foreignObject>section ul.no-list{list-style-type:none;padding:0}div#\:\$p>svg>foreignObject>section ol[type="a s"]{list-style-type:lower-alpha}div#\:\$p>svg>foreignObject>section ol[type="A s"]{list-style-type:upper-alpha}div#\:\$p>svg>foreignObject>section ol[type="i s"]{list-style-type:lower-roman}div#\:\$p>svg>foreignObject>section ol[type="I s"]{list-style-type:upper-roman}div#\:\$p>svg>foreignObject>section div>ol:not([type]),div#\:\$p>svg>foreignObject>section ol[type="1"]{list-style-type:decimal}div#\:\$p>svg>foreignObject>section ol ol,div#\:\$p>svg>foreignObject>section ol ul,div#\:\$p>svg>foreignObject>section ul ol,div#\:\$p>svg>foreignObject>section ul ul{margin-bottom:0;margin-top:0}div#\:\$p>svg>foreignObject>section li>p{margin-top:16px}div#\:\$p>svg>foreignObject>section li+li{margin-top:.25em}div#\:\$p>svg>foreignObject>section dl{padding:0}div#\:\$p>svg>foreignObject>section dl dt{font-size:1em;font-style:italic;font-weight:var(--base-text-weight-semibold,600);margin-top:16px;padding:0}div#\:\$p>svg>foreignObject>section dl dd{margin-bottom:16px;padding:0 16px}div#\:\$p>svg>foreignObject>section table th{font-weight:var(--base-text-weight-semibold,600)}div#\:\$p>svg>foreignObject>section table td,div#\:\$p>svg>foreignObject>section table th{border:1px solid var(--color-border-default);padding:6px 13px}div#\:\$p>svg>foreignObject>section table td>:last-child{margin-bottom:0}div#\:\$p>svg>foreignObject>section table tr{background-color:var(--color-canvas-default);border-top:1px solid var(--color-border-muted)}div#\:\$p>svg>foreignObject>section table tr:nth-child(2n){background-color:var(--color-canvas-subtle)}div#\:\$p>svg>foreignObject>section table img{background-color:transparent}div#\:\$p>svg>foreignObject>section img[align=right]{padding-left:20px}div#\:\$p>svg>foreignObject>section img[align=left]{padding-right:20px}div#\:\$p>svg>foreignObject>section .emoji{background-color:transparent;max-width:none;vertical-align:text-top}div#\:\$p>svg>foreignObject>section :is(span,marp-span).frame,div#\:\$p>svg>foreignObject>section :is(span,marp-span).frame>:is(span,marp-span){display:block;overflow:hidden}div#\:\$p>svg>foreignObject>section :is(span,marp-span).frame>:is(span,marp-span){border:1px solid var(--color-border-default);float:left;margin:13px 0 0;padding:7px;width:auto}div#\:\$p>svg>foreignObject>section :is(span,marp-span).frame :is(span,marp-span) img{display:block;float:left}div#\:\$p>svg>foreignObject>section :is(span,marp-span).frame :is(span,marp-span) :is(span,marp-span){clear:both;color:var(--color-fg-default);display:block;padding:5px 0 0}div#\:\$p>svg>foreignObject>section :is(span,marp-span).align-center{clear:both;display:block;overflow:hidden}div#\:\$p>svg>foreignObject>section :is(span,marp-span).align-center>:is(span,marp-span){display:block;margin:13px auto 0;overflow:hidden;text-align:center}div#\:\$p>svg>foreignObject>section :is(span,marp-span).align-center :is(span,marp-span) img{margin:0 auto;text-align:center}div#\:\$p>svg>foreignObject>section :is(span,marp-span).align-right{clear:both;display:block;overflow:hidden}div#\:\$p>svg>foreignObject>section :is(span,marp-span).align-right>:is(span,marp-span){display:block;margin:13px 0 0;overflow:hidden;text-align:right}div#\:\$p>svg>foreignObject>section :is(span,marp-span).align-right :is(span,marp-span) img{margin:0;text-align:right}div#\:\$p>svg>foreignObject>section :is(span,marp-span).float-left{display:block;float:left;margin-right:13px;overflow:hidden}div#\:\$p>svg>foreignObject>section :is(span,marp-span).float-left :is(span,marp-span){margin:13px 0 0}div#\:\$p>svg>foreignObject>section :is(span,marp-span).float-right{display:block;float:right;margin-left:13px;overflow:hidden}div#\:\$p>svg>foreignObject>section :is(span,marp-span).float-right>:is(span,marp-span){display:block;margin:13px auto 0;overflow:hidden;text-align:right}div#\:\$p>svg>foreignObject>section code,div#\:\$p>svg>foreignObject>section tt{background-color:var(--color-neutral-muted);border-radius:6px;font-size:85%;margin:0;padding:.2em .4em;white-space:break-spaces}div#\:\$p>svg>foreignObject>section code br,div#\:\$p>svg>foreignObject>section tt br{display:none}div#\:\$p>svg>foreignObject>section del code{text-decoration:inherit}div#\:\$p>svg>foreignObject>section samp{font-size:85%}div#\:\$p>svg>foreignObject>section :is(pre,marp-pre) code{font-size:100%}div#\:\$p>svg>foreignObject>section :is(pre,marp-pre)>code{background:transparent;border:0;margin:0;padding:0;white-space:pre;word-break:normal}div#\:\$p>svg>foreignObject>section .highlight{margin-bottom:16px}div#\:\$p>svg>foreignObject>section .highlight :is(pre,marp-pre){margin-bottom:0;word-break:normal}div#\:\$p>svg>foreignObject>section :is(pre,marp-pre){background-color:var(--color-canvas-subtle);border-radius:6px;color:var(--color-fg-default);font-size:85%;line-height:1.45;overflow:auto;padding:16px}div#\:\$p>svg>foreignObject>section :is(pre,marp-pre) code,div#\:\$p>svg>foreignObject>section :is(pre,marp-pre) tt{word-wrap:normal;background-color:transparent;border:0;display:inline;line-height:inherit;margin:0;max-width:auto;overflow:visible;padding:0}div#\:\$p>svg>foreignObject>section .csv-data td,div#\:\$p>svg>foreignObject>section .csv-data th{font-size:12px;line-height:1;overflow:hidden;padding:5px;text-align:left;white-space:nowrap}div#\:\$p>svg>foreignObject>section .csv-data .blob-num{background:var(--color-canvas-default);border:0;padding:10px 8px 9px;text-align:right}div#\:\$p>svg>foreignObject>section .csv-data tr{border-top:0}div#\:\$p>svg>foreignObject>section .csv-data th{background:var(--color-canvas-subtle);border-top:0;font-weight:var(--base-text-weight-semibold,600)}div#\:\$p>svg>foreignObject>section [data-footnote-ref]:before{content:"["}div#\:\$p>svg>foreignObject>section [data-footnote-ref]:after{content:"]"}div#\:\$p>svg>foreignObject>section .footnotes{border-top:1px solid var(--color-border-default);color:var(--color-fg-muted);font-size:12px}div#\:\$p>svg>foreignObject>section div#\:\$p>svg>foreignObject>section section.footnotes{--marpit-root-font-size:12px}div#\:\$p>svg>foreignObject>section .footnotes ol{padding-left:16px}div#\:\$p>svg>foreignObject>section .footnotes ol ul{display:inline-block;margin-top:16px;padding-left:16px}div#\:\$p>svg>foreignObject>section .footnotes li{position:relative}div#\:\$p>svg>foreignObject>section .footnotes li:target:before{border:2px solid var(--color-accent-emphasis);border-radius:6px;bottom:-8px;content:"";left:-24px;pointer-events:none;position:absolute;right:-8px;top:-8px}div#\:\$p>svg>foreignObject>section .footnotes li:target{color:var(--color-fg-default)}div#\:\$p>svg>foreignObject>section .footnotes .data-footnote-backref g-emoji{font-family:monospace}div#\:\$p>svg>foreignObject>section .pl-c{color:var(--color-prettylights-syntax-comment)}div#\:\$p>svg>foreignObject>section .pl-c1,div#\:\$p>svg>foreignObject>section .pl-s .pl-v{color:var(--color-prettylights-syntax-constant)}div#\:\$p>svg>foreignObject>section .pl-e,div#\:\$p>svg>foreignObject>section .pl-en{color:var(--color-prettylights-syntax-entity)}div#\:\$p>svg>foreignObject>section .pl-s .pl-s1,div#\:\$p>svg>foreignObject>section .pl-smi{color:var(--color-prettylights-syntax-storage-modifier-import)}div#\:\$p>svg>foreignObject>section .pl-ent{color:var(--color-prettylights-syntax-entity-tag)}div#\:\$p>svg>foreignObject>section .pl-k{color:var(--color-prettylights-syntax-keyword)}div#\:\$p>svg>foreignObject>section .pl-pds,div#\:\$p>svg>foreignObject>section .pl-s,div#\:\$p>svg>foreignObject>section .pl-s .pl-pse .pl-s1,div#\:\$p>svg>foreignObject>section .pl-sr,div#\:\$p>svg>foreignObject>section .pl-sr .pl-cce,div#\:\$p>svg>foreignObject>section .pl-sr .pl-sra,div#\:\$p>svg>foreignObject>section .pl-sr .pl-sre{color:var(--color-prettylights-syntax-string)}div#\:\$p>svg>foreignObject>section .pl-smw,div#\:\$p>svg>foreignObject>section .pl-v{color:var(--color-prettylights-syntax-variable)}div#\:\$p>svg>foreignObject>section .pl-bu{color:var(--color-prettylights-syntax-brackethighlighter-unmatched)}div#\:\$p>svg>foreignObject>section .pl-ii{background-color:var(--color-prettylights-syntax-invalid-illegal-bg);color:var(--color-prettylights-syntax-invalid-illegal-text)}div#\:\$p>svg>foreignObject>section .pl-c2{background-color:var(--color-prettylights-syntax-carriage-return-bg);color:var(--color-prettylights-syntax-carriage-return-text)}div#\:\$p>svg>foreignObject>section .pl-sr .pl-cce{color:var(--color-prettylights-syntax-string-regexp);font-weight:700}div#\:\$p>svg>foreignObject>section .pl-ml{color:var(--color-prettylights-syntax-markup-list)}div#\:\$p>svg>foreignObject>section .pl-mh,div#\:\$p>svg>foreignObject>section .pl-mh .pl-en,div#\:\$p>svg>foreignObject>section .pl-ms{color:var(--color-prettylights-syntax-markup-heading);font-weight:700}div#\:\$p>svg>foreignObject>section .pl-mi{color:var(--color-prettylights-syntax-markup-italic);font-style:italic}div#\:\$p>svg>foreignObject>section .pl-mb{color:var(--color-prettylights-syntax-markup-bold);font-weight:700}div#\:\$p>svg>foreignObject>section .pl-md{background-color:var(--color-prettylights-syntax-markup-deleted-bg);color:var(--color-prettylights-syntax-markup-deleted-text)}div#\:\$p>svg>foreignObject>section .pl-mi1{background-color:var(--color-prettylights-syntax-markup-inserted-bg);color:var(--color-prettylights-syntax-markup-inserted-text)}div#\:\$p>svg>foreignObject>section .pl-mc{background-color:var(--color-prettylights-syntax-markup-changed-bg);color:var(--color-prettylights-syntax-markup-changed-text)}div#\:\$p>svg>foreignObject>section .pl-mi2{background-color:var(--color-prettylights-syntax-markup-ignored-bg);color:var(--color-prettylights-syntax-markup-ignored-text)}div#\:\$p>svg>foreignObject>section .pl-mdr{color:var(--color-prettylights-syntax-meta-diff-range);font-weight:700}div#\:\$p>svg>foreignObject>section .pl-ba{color:var(--color-prettylights-syntax-brackethighlighter-angle)}div#\:\$p>svg>foreignObject>section .pl-sg{color:var(--color-prettylights-syntax-sublimelinter-gutter-mark)}div#\:\$p>svg>foreignObject>section .pl-corl{color:var(--color-prettylights-syntax-constant-other-reference-link);text-decoration:underline}div#\:\$p>svg>foreignObject>section g-emoji{display:inline-block;font-family:Apple Color Emoji,Segoe UI Emoji,Segoe UI Symbol;font-size:1em;font-style:normal!important;font-weight:var(--base-text-weight-normal,400);line-height:1;min-width:1ch;vertical-align:-.075em}div#\:\$p>svg>foreignObject>section g-emoji img{height:1em;width:1em}div#\:\$p>svg>foreignObject>section .task-list-item{list-style-type:none}div#\:\$p>svg>foreignObject>section .task-list-item label{font-weight:var(--base-text-weight-normal,400)}div#\:\$p>svg>foreignObject>section .task-list-item.enabled label{cursor:pointer}div#\:\$p>svg>foreignObject>section .task-list-item+.task-list-item{margin-top:4px}div#\:\$p>svg>foreignObject>section .task-list-item .handle{display:none}div#\:\$p>svg>foreignObject>section .task-list-item-checkbox{margin:0 .2em .25em -1.4em;vertical-align:middle}div#\:\$p>svg>foreignObject>section .contains-task-list:dir(rtl) .task-list-item-checkbox{margin:0 -1.6em .25em .2em}div#\:\$p>svg>foreignObject>section .contains-task-list{position:relative}div#\:\$p>svg>foreignObject>section .contains-task-list:focus-within .task-list-item-convert-container,div#\:\$p>svg>foreignObject>section .contains-task-list:hover .task-list-item-convert-container{clip:auto;display:block;height:24px;overflow:visible;width:auto}div#\:\$p>svg>foreignObject>section ::-webkit-calendar-picker-indicator{filter:invert(50%)}div#\:\$p>svg>foreignObject>section :is(h1,marp-h1){color:var(--h1-color);font-size:1.6em}div#\:\$p>svg>foreignObject>section :is(h1,marp-h1),div#\:\$p>svg>foreignObject>section :is(h2,marp-h2){border-bottom:none}div#\:\$p>svg>foreignObject>section :is(h2,marp-h2){font-size:1.3em}div#\:\$p>svg>foreignObject>section :is(h3,marp-h3){font-size:1.1em}div#\:\$p>svg>foreignObject>section :is(h4,marp-h4){font-size:1.05em}div#\:\$p>svg>foreignObject>section :is(h5,marp-h5){font-size:1em}div#\:\$p>svg>foreignObject>section :is(h6,marp-h6){font-size:.9em}div#\:\$p>svg>foreignObject>section :is(h1,marp-h1) strong,div#\:\$p>svg>foreignObject>section :is(h2,marp-h2) strong,div#\:\$p>svg>foreignObject>section :is(h3,marp-h3) strong,div#\:\$p>svg>foreignObject>section :is(h4,marp-h4) strong,div#\:\$p>svg>foreignObject>section :is(h5,marp-h5) strong,div#\:\$p>svg>foreignObject>section :is(h6,marp-h6) strong{color:var(--heading-strong-color);font-weight:inherit}div#\:\$p>svg>foreignObject>section :is(h1,marp-h1)::part(auto-scaling),div#\:\$p>svg>foreignObject>section :is(h2,marp-h2)::part(auto-scaling),div#\:\$p>svg>foreignObject>section :is(h3,marp-h3)::part(auto-scaling),div#\:\$p>svg>foreignObject>section :is(h4,marp-h4)::part(auto-scaling),div#\:\$p>svg>foreignObject>section :is(h5,marp-h5)::part(auto-scaling),div#\:\$p>svg>foreignObject>section :is(h6,marp-h6)::part(auto-scaling){max-height:563px}div#\:\$p>svg>foreignObject>section hr{height:0;padding-top:.25em}div#\:\$p>svg>foreignObject>section :is(pre,marp-pre){border:1px solid var(--color-border-default);line-height:1.15;overflow:visible}div#\:\$p>svg>foreignObject>section :is(pre,marp-pre)::part(auto-scaling){max-height:529px}div#\:\$p>svg>foreignObject>section :is(pre,marp-pre) :where(.hljs){color:var(--color-prettylights-syntax-storage-modifier-import)}div#\:\$p>svg>foreignObject>section :is(pre,marp-pre) :where(.hljs-doctag),div#\:\$p>svg>foreignObject>section :is(pre,marp-pre) :where(.hljs-keyword),div#\:\$p>svg>foreignObject>section :is(pre,marp-pre) :where(.hljs-meta .hljs-keyword),div#\:\$p>svg>foreignObject>section :is(pre,marp-pre) :where(.hljs-template-tag),div#\:\$p>svg>foreignObject>section :is(pre,marp-pre) :where(.hljs-template-variable),div#\:\$p>svg>foreignObject>section :is(pre,marp-pre) :where(.hljs-type),div#\:\$p>svg>foreignObject>section :is(pre,marp-pre) :where(.hljs-variable.language_){color:var(--color-prettylights-syntax-keyword)}div#\:\$p>svg>foreignObject>section :is(pre,marp-pre) :where(.hljs-title),div#\:\$p>svg>foreignObject>section :is(pre,marp-pre) :where(.hljs-title.class_),div#\:\$p>svg>foreignObject>section :is(pre,marp-pre) :where(.hljs-title.class_.inherited__),div#\:\$p>svg>foreignObject>section :is(pre,marp-pre) :where(.hljs-title.function_){color:var(--color-prettylights-syntax-entity)}div#\:\$p>svg>foreignObject>section :is(pre,marp-pre) :where(.hljs-attr),div#\:\$p>svg>foreignObject>section :is(pre,marp-pre) :where(.hljs-attribute),div#\:\$p>svg>foreignObject>section :is(pre,marp-pre) :where(.hljs-literal),div#\:\$p>svg>foreignObject>section :is(pre,marp-pre) :where(.hljs-meta),div#\:\$p>svg>foreignObject>section :is(pre,marp-pre) :where(.hljs-number),div#\:\$p>svg>foreignObject>section :is(pre,marp-pre) :where(.hljs-operator),div#\:\$p>svg>foreignObject>section :is(pre,marp-pre) :where(.hljs-selector-attr),div#\:\$p>svg>foreignObject>section :is(pre,marp-pre) :where(.hljs-selector-class),div#\:\$p>svg>foreignObject>section :is(pre,marp-pre) :where(.hljs-selector-id),div#\:\$p>svg>foreignObject>section :is(pre,marp-pre) :where(.hljs-variable){color:var(--color-prettylights-syntax-constant)}div#\:\$p>svg>foreignObject>section :is(pre,marp-pre) :where(.hljs-meta .hljs-string),div#\:\$p>svg>foreignObject>section :is(pre,marp-pre) :where(.hljs-regexp),div#\:\$p>svg>foreignObject>section :is(pre,marp-pre) :where(.hljs-string){color:var(--color-prettylights-syntax-string)}div#\:\$p>svg>foreignObject>section :is(pre,marp-pre) :where(.hljs-built_in),div#\:\$p>svg>foreignObject>section :is(pre,marp-pre) :where(.hljs-symbol){color:var(--color-prettylights-syntax-variable)}div#\:\$p>svg>foreignObject>section :is(pre,marp-pre) :where(.hljs-code),div#\:\$p>svg>foreignObject>section :is(pre,marp-pre) :where(.hljs-comment),div#\:\$p>svg>foreignObject>section :is(pre,marp-pre) :where(.hljs-formula){color:var(--color-prettylights-syntax-comment)}div#\:\$p>svg>foreignObject>section :is(pre,marp-pre) :where(.hljs-name),div#\:\$p>svg>foreignObject>section :is(pre,marp-pre) :where(.hljs-quote),div#\:\$p>svg>foreignObject>section :is(pre,marp-pre) :where(.hljs-selector-pseudo),div#\:\$p>svg>foreignObject>section :is(pre,marp-pre) :where(.hljs-selector-tag){color:var(--color-prettylights-syntax-entity-tag)}div#\:\$p>svg>foreignObject>section :is(pre,marp-pre) :where(.hljs-subst){color:var(--color-prettylights-syntax-storage-modifier-import)}div#\:\$p>svg>foreignObject>section :is(pre,marp-pre) :where(.hljs-section){color:var(--color-prettylights-syntax-markup-heading);font-weight:700}div#\:\$p>svg>foreignObject>section :is(pre,marp-pre) :where(.hljs-bullet){color:var(--color-prettylights-syntax-markup-list)}div#\:\$p>svg>foreignObject>section :is(pre,marp-pre) :where(.hljs-emphasis){color:var(--color-prettylights-syntax-markup-italic);font-style:italic}div#\:\$p>svg>foreignObject>section :is(pre,marp-pre) :where(.hljs-strong){color:var(--color-prettylights-syntax-markup-bold);font-weight:700}div#\:\$p>svg>foreignObject>section :is(pre,marp-pre) :where(.hljs-addition){background-color:var(--color-prettylights-syntax-markup-inserted-bg);color:var(--color-prettylights-syntax-markup-inserted-text)}div#\:\$p>svg>foreignObject>section :is(pre,marp-pre) :where(.hljs-deletion){background-color:var(--color-prettylights-syntax-markup-deleted-bg);color:var(--color-prettylights-syntax-markup-deleted-text)}div#\:\$p>svg>foreignObject>section footer,div#\:\$p>svg>foreignObject>section header{color:var(--header-footer-color);font-size:18px;left:30px;margin:0;position:absolute}div#\:\$p>svg>foreignObject>section header{top:21px}div#\:\$p>svg>foreignObject>section footer{bottom:21px}div#\:\$p>svg>foreignObject>section{--h1-color:#246;--header-footer-color:hsla(0,0%,40%,.75);--heading-strong-color:#48c;--paginate-color:#777;align-items:stretch;display:flex;flex-flow:column nowrap;font-size:29px;height:720px;justify-content:center;padding:78.5px;width:1280px}div#\:\$p>svg>foreignObject>section{--marpit-root-font-size:29px}div#\:\$p>svg>foreignObject>section:where(.invert){--h1-color:#cee7ff;--header-footer-color:hsla(0,0%,60%,.75);--heading-strong-color:#7bf;--paginate-color:#999}div#\:\$p>svg>foreignObject>section>:last-child,div#\:\$p>svg>foreignObject>section[data-footer]>:nth-last-child(2){margin-bottom:0}div#\:\$p>svg>foreignObject>section>:first-child,div#\:\$p>svg>foreignObject>section>header:first-child+*{margin-top:0}div#\:\$p>svg>foreignObject>section:after{bottom:21px;color:var(--paginate-color);font-size:24px;padding:0;position:absolute;right:30px}div#\:\$p>svg>foreignObject>section:after{--marpit-root-font-size:24px}div#\:\$p>svg>foreignObject>section[data-color] :is(h1,marp-h1),div#\:\$p>svg>foreignObject>section[data-color] :is(h2,marp-h2),div#\:\$p>svg>foreignObject>section[data-color] :is(h3,marp-h3),div#\:\$p>svg>foreignObject>section[data-color] :is(h4,marp-h4),div#\:\$p>svg>foreignObject>section[data-color] :is(h5,marp-h5),div#\:\$p>svg>foreignObject>section[data-color] :is(h6,marp-h6){color:currentcolor}div#\:\$p>svg>foreignObject>section[data-marpit-advanced-background=background]{columns:initial!important;display:block!important;padding:0!important}div#\:\$p>svg>foreignObject>section[data-marpit-advanced-background=background]:after,div#\:\$p>svg>foreignObject>section[data-marpit-advanced-background=background]:before,div#\:\$p>svg>foreignObject>section[data-marpit-advanced-background=content]:after,div#\:\$p>svg>foreignObject>section[data-marpit-advanced-background=content]:before{display:none!important}div#\:\$p>svg>foreignObject>section[data-marpit-advanced-background=background]>div[data-marpit-advanced-background-container]{all:initial;display:flex;flex-direction:row;height:100%;overflow:hidden;width:100%}div#\:\$p>svg>foreignObject>section[data-marpit-advanced-background=background]>div[data-marpit-advanced-background-container][data-marpit-advanced-background-direction=vertical]{flex-direction:column}div#\:\$p>svg>foreignObject>section[data-marpit-advanced-background=background][data-marpit-advanced-background-split]>div[data-marpit-advanced-background-container]{width:var(--marpit-advanced-background-split,50%)}div#\:\$p>svg>foreignObject>section[data-marpit-advanced-background=background][data-marpit-advanced-background-split=right]>div[data-marpit-advanced-background-container]{margin-left:calc(100% - var(--marpit-advanced-background-split, 50%))}div#\:\$p>svg>foreignObject>section[data-marpit-advanced-background=background]>div[data-marpit-advanced-background-container]>figure{all:initial;background-position:center;background-repeat:no-repeat;background-size:cover;flex:auto;margin:0}div#\:\$p>svg>foreignObject>section[data-marpit-advanced-background=background]>div[data-marpit-advanced-background-container]>figure>figcaption{position:absolute;border:0;clip:rect(0,0,0,0);height:1px;margin:-1px;overflow:hidden;padding:0;white-space:nowrap;width:1px}div#\:\$p>svg>foreignObject>section[data-marpit-advanced-background=content],div#\:\$p>svg>foreignObject>section[data-marpit-advanced-background=pseudo]{background:transparent!important}div#\:\$p>svg>foreignObject>section[data-marpit-advanced-background=pseudo],div#\:\$p>svg[data-marpit-svg]>foreignObject[data-marpit-advanced-background=pseudo]{pointer-events:none!important}div#\:\$p>svg>foreignObject>section[data-marpit-advanced-background-split]{width:100%;height:100%}
+

Eggstrain


Authors: Connor, Kyle, Sarvesh

Vectorized Push-Based inspired Execution Engine

-
+

Overview

We will be taking heavy inspiration from:

-
+

Our Design Goals

    -
  • Robustness
  • -
  • Modularity
  • -
  • Extensibility
  • -
  • Forward Compatibility
  • +
  • Robustness
  • +
  • Modularity
  • +
  • Extensibility
  • +
  • Forward Compatibility
-
+

Features

    -
  • Encode behavior in the type system
  • -
  • Provide bare minimum statistics the optimizer needs +
  • Encode behavior in the type system
  • +
  • Provide bare minimum statistics the optimizer needs
      -
    • Timing
    • -
    • Cardinality
    • +
    • Timing
    • +
    • Cardinality
-
+

List of rust crates we plan to use

    -
  • arrow: for handling the Apache Arrow format
  • -
  • tokio: high performance async runtime
  • -
  • rayon: data parallelism crate
  • -
  • anyhow: ergonomic Error handling
  • +
  • arrow: for handling the Apache Arrow format
  • +
  • tokio: high performance async runtime
  • +
  • rayon: data parallelism crate
  • +
  • datafusion: for the input of physical plans
-
+

Design Rationale

Push vs Pull Based

@@ -85,121 +85,74 @@

Design Rationale

-
+

Step 1: Finalize Interfaces

Finalize API with other teams:

    -
  • I/O Service
  • -
  • Catalog
  • -
  • Scheduler
  • +
  • I/O Service
  • +
  • Catalog
  • +
  • Scheduler
-
-

Step 2: Buffer Pool Manager

- -

Need to spill the data to local disk.

+
+

Step 2: Implement operators in memory

    -
  • Can potentially rip out the memory_pool
  • +
  • TableScan
  • +
  • Filter (Completed)
  • +
  • Projection (Completed)
  • +
  • HashAggregation (In-Progress)
  • +
  • HashProbe + HashBuild (In-Progress)
  • +
  • OrderBy (Completed)
  • +
  • TopN (Completed)
  • +
  • Exchange
  • +
  • More may be added as a stretch goal.
-
-

Step 3: Implement operators

+
+

Step 3: Buffer Pool Manager

+ +

Need to spill the data to local disk.

    -
  • TableScan
  • -
  • FilterProject
  • -
  • HashAggregation
  • -
  • HashProbe + HashBuild
  • -
  • MergeJoin
  • -
  • NestedLoopJoin
  • -
  • OrderBy
  • -
  • TopN
  • -
  • Limit
  • -
  • Values
  • -
  • More may be added as a stretch goal.
  • +
  • Can potentially rip out the memory_pool
-
+

Final Design

-
+

Testing

    -
  • Unit tests for each operator
  • -
  • Timing each operator's performance to benchmark our code
  • +
  • Unit tests for each operator
  • +
  • Timing each operator's performance to benchmark our code
-
+

For the sake of code quality...

    -
  • Pair programming (all combinations: KC, KS, CS)
  • -
  • Unit testing for each operator
  • -
  • Integrated tests across mutliple operators
  • +
  • Pair programming (all combinations: KC, KS, CS)
  • +
  • Unit testing for each operator
  • +
  • Integrated tests across mutliple operators
-
+
+

Example Operator Workflow

+
+

Goals

    -
  • 75%: First 7 operators working + integration with other components
  • -
  • 100%: All operators listed above working
  • -
  • 125%: TPC-H benchmark working
  • +
  • 75%: First 7 operators working + integration with other components
  • +
  • 100%: All operators listed above working
  • +
  • 125%: TPC-H benchmark working
-
+

Stretch Goal

    -
  • Integrating with a DBMS
  • -
  • Testing against TPC-H or TPC-H like workload
  • -
  • Add a lot of statistics and timers to each operator (for optimizer's sake)
  • +
  • Integrating with a DBMS
  • +
  • Testing against TPC-H or TPC-H like workload
  • +
  • Add a lot of statistics and timers to each operator (for optimizer's sake)
-
-

Potential StorageClient API

-
/// Will probably end up re-exporting this type:
-pub type SendableRecordBatchStream =
-    Pin<Box<
-        dyn RecordBatchStream<Item =
-            Result<RecordBatch, DataFusionError>
-        > + Send
-    >>;
-
-impl StorageClient {
-    /// Have some sort of way to create a `StorageClient` on our local node.
-    pub fn new(_id: usize) -> Self {
-        Self
-    }
-
-    pub async fn request_data(
-        &self,
-        _request: BlobData,
-    ) -> SendableRecordBatchStream {
-        todo!()
-    }
-}
-
-
-
-

Example usage of the storage client

-
#[tokio::main]
-async fn main() -> anyhow::Result<()> {
-    // Initialize a storage client
-    let sc = storage_client::StorageClient::new(42);
-
-    // Formualte a request we want to make to the storage client
-    let request = create_column_request();
-
-    // Request data from the storage client
-    // Note that this request could fail
-    let stream = sc.request_data(request).await?;
-
-    // Executor node returns a future containing
-    // another stream that can be sent to another operator
-    let table_scan_node = operators::TableScan::new();
-    let result = table_scan_node.execute_with_stream(stream);
-
-    Ok(())
-}
-
-
\ No newline at end of file diff --git a/proposal/presentation.md b/proposal/presentation.md index 5823a1c..8250848 100644 --- a/proposal/presentation.md +++ b/proposal/presentation.md @@ -5,7 +5,7 @@ class: invert # Remove this line for light mode paginate: true --- -# Execution Engine: Eggstrain +# Eggstrain
@@ -13,210 +13,131 @@ paginate: true Vectorized Push-Based inspired Execution Engine - --- - # Overview We will be taking heavy inspiration from: -* [DataFusion](https://arrow.apache.org/datafusion/) -* [Velox](https://velox-lib.io/) -* [InfluxDB](https://github.com/influxdata/influxdb) - * which is built on top of DataFusion +- [DataFusion](https://arrow.apache.org/datafusion/) +- [Velox](https://velox-lib.io/) +- [InfluxDB](https://github.com/influxdata/influxdb) + - which is built on top of DataFusion --- - # Our Design Goals ![bg right:50% 120%](./images/robustness.png) -* Robustness -* Modularity -* Extensibility -* Forward Compatibility - +- Robustness +- Modularity +- Extensibility +- Forward Compatibility --- - # Features -* Encode behavior in the type system -* Provide bare minimum statistics the optimizer needs - * Timing - * Cardinality - +- Encode behavior in the type system +- Provide bare minimum statistics the optimizer needs + - Timing + - Cardinality --- - # List of rust crates we plan to use -* `arrow`: for handling the Apache Arrow format -* `tokio`: high performance `async` runtime -* `rayon`: data parallelism crate -* `anyhow`: ergonomic `Error` handling - +- `arrow`: for handling the Apache Arrow format +- `tokio`: high performance `async` runtime +- `rayon`: data parallelism crate +- `datafusion`: for the input of physical plans --- - # Design Rationale Push vs Pull Based -| Push | Pull | -| --- | --- | -| Improves cache efficiency by removing control flow logic | Easier to implement | -| Forking is efficient: You push a thing only once | Operators like LIMIT make their producers aware of when to stop running (Headache for the optimizer) | -| Parallelization is easier | Parallelization is harder | - +| Push | Pull | +| -------------------------------------------------------- | ---------------------------------------------------------------------------------------------------- | +| Improves cache efficiency by removing control flow logic | Easier to implement | +| Forking is efficient: You push a thing only once | Operators like LIMIT make their producers aware of when to stop running (Headache for the optimizer) | +| Parallelization is easier | Parallelization is harder | --- - # Step 1: Finalize Interfaces Finalize API with other teams: -* I/O Service -* Catalog -* Scheduler - +- I/O Service +- Catalog +- Scheduler --- +# Step 2: Implement operators in memory -# Step 2: Buffer Pool Manager - -![bg right:50% 80%](./images/bufferpool.png) - -Need to spill the data to local disk. This will be done after the first operators are implemented. - -* Can potentially rip out the [`memory_pool`](https://docs.rs/datafusion/latest/datafusion/execution/memory_pool/index.html) - +- TableScan +- Filter (Completed) +- Projection (Completed) +- HashAggregation (In-Progress) +- HashProbe + HashBuild (In-Progress) +- OrderBy (Completed) +- TopN (Completed) +- Exchange +- More may be added as a stretch goal. --- +# Step 3: Buffer Pool Manager -# Step 3: Implement operators +![bg right:50% 80%](./images/bufferpool.png) -* TableScan -* Filter (Completed) -* Projection (Completed) -* HashAggregation (In-Progress) -* HashProbe + HashBuild (In-Progress) -* MergeJoin -* NestedLoopJoin -* OrderBy -* TopN -* Limit -* Values -* More may be added as a stretch goal. +Need to spill the data to local disk. This will be done after the first operators are implemented. +- Can potentially rip out the [`memory_pool`](https://docs.rs/datafusion/latest/datafusion/execution/memory_pool/index.html) --- - # Final Design ![bg right:70% 100%](./images/architecture.drawio.svg) - --- - # Testing -* Unit tests for each operator -* Timing each operator's performance to benchmark our code +- Unit tests for each operator +- Timing each operator's performance to benchmark our code --- - # For the sake of code quality... -* Pair programming (all combinations: KC, KS, CS) -* Unit testing for each operator -* Integrated tests across mutliple operators - - ---- - - -# Goals - -* 75%: First 7 operators working + integration with other components -* 100%: All operators listed above working -* 125%: TPC-H benchmark working - +- Pair programming (all combinations: KC, KS, CS) +- Unit testing for each operator +- Integrated tests across mutliple operators --- +# Example Operator Workflow -# Stretch Goal - -* Integrating with a DBMS -* Testing against TPC-H or TPC-H like workload -* Add a lot of statistics and timers to each operator (for optimizer's sake) - +![bg right:70% 80%](./images/hashjoin.svg) --- +# Goals -# Potential `StorageClient` API - -```rust -/// Will probably end up re-exporting this type: -pub type SendableRecordBatchStream = - Pin - > + Send - >>; - -impl StorageClient { - /// Have some sort of way to create a `StorageClient` on our local node. - pub fn new(_id: usize) -> Self { - Self - } - - pub async fn request_data( - &self, - _request: BlobData, - ) -> SendableRecordBatchStream { - todo!() - } -} -``` - +- 75%: First 7 operators working + integration with other components +- 100%: All operators listed above working +- 125%: TPC-H benchmark working --- +# Stretch Goal -# Example usage of the storage client - -```rust -#[tokio::main] -async fn main() -> anyhow::Result<()> { - // Initialize a storage client - let sc = storage_client::StorageClient::new(42); - - // Formualte a request we want to make to the storage client - let request = create_column_request(); - - // Request data from the storage client - // Note that this request could fail - let stream = sc.request_data(request).await?; - - // Executor node returns a future containing - // another stream that can be sent to another operator - let table_scan_node = operators::TableScan::new(); - let result = table_scan_node.execute_with_stream(stream); - - Ok(()) -} -``` +- Integrating with a DBMS +- Testing against TPC-H or TPC-H like workload +- Add a lot of statistics and timers to each operator (for optimizer's sake) diff --git a/proposal/presentation.pdf b/proposal/presentation.pdf index 76447cb..f1538ae 100644 Binary files a/proposal/presentation.pdf and b/proposal/presentation.pdf differ diff --git a/queries/filter_project_sort.sql b/queries/filter_project_sort.sql new file mode 100644 index 0000000..984dce1 --- /dev/null +++ b/queries/filter_project_sort.sql @@ -0,0 +1,5 @@ +SELECT orders.o_totalprice, orders.o_orderdate +FROM orders +WHERE + orders.o_totalprice < 900.00 +ORDER BY orders.o_totalprice; \ No newline at end of file