Skip to content

Commit

Permalink
using rdds to achieve computation
Browse files Browse the repository at this point in the history
  • Loading branch information
Rizxcviii committed Apr 26, 2022
1 parent 11a99a1 commit 97dcfda
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 2 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# created by virtualenv automatically
env
output
14 changes: 12 additions & 2 deletions main.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,17 @@
from pyspark.sql import SparkSession
from pyspark.sql.functions import col, explode, from_json
from pyspark.sql.types import ArrayType, StringType, StructField, StructType

spark = SparkSession.builder.appName("FHIR conversoin").getOrCreate()

df = spark.read.option("multiline", "true").json("./data")
data = spark.read.option("multiline", "true").json("./data").rdd

df.show(10)
mappedLines = data.map(lambda x: x.asDict())

entryLines = mappedLines.map(lambda x: x["entry"])

flattendEntries = entryLines.flatMap(lambda x: x)

requiredData = flattendEntries.map(lambda x: (x["fullUrl"], x["resource"]["id"]))

requiredData.saveAsTextFile("./output")

0 comments on commit 97dcfda

Please sign in to comment.