Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Week 2 Kevin G #38

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ cython_debug/
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
.idea/

# Mac OS
.DS_Store
5 changes: 4 additions & 1 deletion week_1/project/config.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
ops:
get_s3_data:
config:
s3_key: week_1/data/stock.csv
s3_key: week_1/data/stock.csv
process_data:
config:
n: 3
16 changes: 12 additions & 4 deletions week_1/project/week_1.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import csv
import logging
from datetime import datetime
from typing import List

Expand Down Expand Up @@ -51,15 +52,22 @@ def get_s3_data(context):


@op
def process_data():
pass
def process_data(raw_stocks: List[Stock]) -> Aggregation:
highest_stock = max(raw_stocks, key=lambda x: x.high)
return Aggregation(date=highest_stock.date, high=highest_stock.high)


@op
def put_redis_data():
def put_redis_data(aggregation: Aggregation):
logging.info(f"Put {aggregation} in redis")
pass


@job
def week_1_pipeline():
pass
put_redis_data(process_data(get_s3_data()))


week_1_pipeline.execute_in_process(
run_config={'ops': {'get_s3_data': {'config': {'s3_key': "week_1/data/stock.csv"}}}}
)
25 changes: 19 additions & 6 deletions week_1/project/week_1_challenge.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
job,
op,
usable_as_dagster_type,
get_dagster_logger
)
from pydantic import BaseModel

Expand Down Expand Up @@ -60,16 +61,28 @@ def get_s3_data(context):
return output


@op
def process_data():
pass
@op(out=DynamicOut(), config_schema={"n": int})
def process_data(context, raw_stocks: List[Stock]) -> DynamicOutput:
n = context.op_config["n"]
top_stocks = nlargest(n, raw_stocks, key=lambda x: x.high)
aggregations = [Aggregation(date=stock.date, high=stock.high) for stock in top_stocks]
get_dagster_logger().info(aggregations)
for idx, stock in enumerate(aggregations):
yield DynamicOutput(stock, mapping_key=str(idx+1))


@op
def put_redis_data():
pass
def put_redis_data(aggregation=DynamicOut):
get_dagster_logger().info(f"Put Aggregation {aggregation} in redis")


@job
def week_1_pipeline():
pass
aggregates = process_data(get_s3_data())
aggregates.map(put_redis_data)


week_1_pipeline.execute_in_process(
run_config={'ops': {'get_s3_data': {'config': {'s3_key': "week_1/data/stock.csv"}}},
{'process_data'}: {'config': {'n': 3}}}
)
37 changes: 24 additions & 13 deletions week_2/dagster_ucr/project/week_2.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,41 @@
from typing import List

import logging
from dagster import In, Nothing, Out, ResourceDefinition, graph, op
from dagster_ucr.project.types import Aggregation, Stock
from dagster_ucr.resources import mock_s3_resource, redis_resource, s3_resource


@op
def get_s3_data():
pass
@op(
required_resource_keys={'s3'},
config_schema={"s3_key": str},
out={"stocks": Out(dagster_type=List[Stock])},
tags={"kind": "s3"},
description="Get a list of stocks from an S3 file")
def get_s3_data(context):
stocks = []
for record in context.resources.s3.get_data(context.op_config["s3_key"]):
stocks.append(Stock.from_list(record))
return stocks


@op
def process_data():
# Use your op from week 1
pass
def process_data(raw_stocks: List[Stock]) -> Aggregation:
highest_stock = max(raw_stocks, key=lambda x: x.high)
return Aggregation(date=highest_stock.date, high=highest_stock.high)


@op
def put_redis_data():
pass

@op(
required_resource_keys={'redis'},
ins={"aggregation": In(dagster_type=Aggregation)},
description="Put Aggregation data into Redis",
tags={"kind": "redis"}
)
def put_redis_data(context, aggregation: Aggregation):
context.resources.redis.put_data(aggregation.date, aggregation.high)

@graph
def week_2_pipeline():
# Use your graph from week 1
pass
put_redis_data(process_data(get_s3_data()))


local = {
Expand Down
23 changes: 18 additions & 5 deletions week_2/dagster_ucr/resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,13 +91,26 @@ def mock_s3_resource():
return s3_mock


@resource
def s3_resource():
@resource(
config_schema={"bucket": str, "access_key": str, "secret_key": str, "endpoint_url": str},
description="S3 resource"
)
def s3_resource(context):
"""This resource defines a S3 client"""
pass
return S3(
bucket=context.resource_config["bucket"],
access_key=context.resource_config["access_key"],
secret_key=context.resource_config["secret_key"],
endpoint_url=context.resource_config["endpoint_url"]
)


@resource
def redis_resource():
@resource(
config_schema={"host": str, "port": int}, description="Redis resource"
)
def redis_resource(context):
"""This resource defines a Redis client"""
return Redis(
host=context.resource_config["host"],
port=context.resource_config["port"])
pass