Skip to content

Commit

Permalink
new example join cascade.py
Browse files Browse the repository at this point in the history
  • Loading branch information
liana313 committed Nov 18, 2024
1 parent b786163 commit f161ca7
Showing 1 changed file with 6 additions and 93 deletions.
99 changes: 6 additions & 93 deletions examples/op_examples/join_cascade.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,81 +15,6 @@
"Data Structures and Algorithms",
"Artificial Intelligence",
"Natural Language Processing",
"Introduction to Robotics",
"Linear Algebra and Differential Equations",
"Database Systems",
"Cloud Computing",
"Software Engineering",
"Operating Systems",
"Discrete Mathematics",
"Wireless Communication Systems",
"Embedded Systems",
"Advanced Computer Architecture",
"Cryptography and Network Security",
"Big Data Analytics",
"Environmental Studies",
"Genetics and Evolution",
"Human Physiology",
"Introduction to Anthropology",
"Macroeconomics",
"Microeconomics",
"Introduction to Sociology",
"Developmental Psychology",
"Cognitive Science",
"Introduction to Philosophy",
"Ethics and Moral Philosophy",
"History of Western Civilization",
"Art History: Renaissance to Modern",
"World Literature",
"Introduction to Journalism",
"Public Speaking and Communication",
"Creative Writing",
"Introduction to Theater",
"Film Studies",
"Environmental Policy and Law",
"Sustainability and Renewable Energy",
"Urban Planning and Design",
"International Relations",
"Marketing Principles",
"Organizational Behavior",
"Financial Accounting",
"Corporate Finance",
"Operations Research",
"Entrepreneurship and Innovation",
"Introduction to Psychology",
"Biostatistics",
"Social Work Practice",
"Public Health Policy",
"Environmental Ethics",
"History of Political Thought",
"Quantitative Research Methods",
"Comparative Politics",
"Behavioral Economics",
"Sociology of Education",
"Social Psychology",
"Gender Studies",
"Media and Communication Studies",
"Advertising and Brand Strategy",
"Sports Management",
"Introduction to Archaeology",
"Ecology and Conservation Biology",
"Geology and Earth Science",
"Astronomy and Astrophysics",
"Introduction to Meteorology",
"Introduction to Oceanography",
"Introduction to Civil Engineering",
"Material Science and Engineering",
"Structural Engineering",
"Environmental Engineering",
"Energy Systems Engineering",
"Aerodynamics",
"Renewable Energy Systems",
"Transportation Engineering",
"Water Resources Management",
"Principles of Accounting",
"Project Management",
"International Business",
"Business Analytics",
]
}

Expand All @@ -110,29 +35,17 @@
"Circuit Design", "Robotics", "Environmental Science", "Marine Biology", "Urban Planning", "Geography",
"Agricultural Science", "Animal Care", "Veterinary Science", "Zoology", "Ecology", "Botany", "Landscape Design",
"Baking & Pastry", "Culinary Arts", "Bartending", "Nutrition", "Dietary Planning", "Physical Training", "Yoga",
"Meditation", "Dance", "Music Production", "Audio Engineering", "Voice Acting", "Acting", "Film Production",
"Directing", "Screenwriting", "Set Design", "Costume Design", "Stage Management", "Sound Design", "Lighting Design",
"History", "Art History", "Philosophy", "Religious Studies", "Economics", "Statistics", "Calculus", "Algebra",
"Geometry", "Trigonometry", "Early Childhood Education", "Special Education", "Teaching", "Curriculum Development",
"Educational Psychology", "Instructional Design", "Library Science", "Museum Studies", "Archaeology",
"Content Strategy", "Branding", "Product Design", "Industrial Design", "Supply Chain Analysis", "Manufacturing",
"Logistics", "Warehouse Management", "Inventory Management", "Risk Management", "Compliance", "Auditing",
"Portfolio Management", "Investment Analysis", "Real Estate", "Insurance", "Claims Processing", "Underwriting",
"Tax Preparation", "Financial Planning", "Estate Planning", "Business Analysis", "Data Mining", "Big Data",
"Natural Language Processing", "Speech Recognition", "Machine Vision", "Bioinformatics", "Chemoinformatics",
"Geoinformatics", "Energy Management", "Construction Management", "Property Management", "Facility Management",
"Hotel Management", "Travel Planning", "Event Coordination", "Salesforce", "Customer Relationship Management (CRM)",
"SAP", "Oracle", "Microsoft Excel", "Microsoft Word", "Microsoft PowerPoint", "Microsoft Outlook", "Google Sheets",
"Google Docs", "Slack", "Trello", "JIRA", "Confluence", "Asana", "Adobe Photoshop", "Adobe Illustrator",
"Adobe Premiere Pro", "Adobe After Effects", "CorelDRAW", "Figma", "Sketch", "Canva", "Final Cut Pro", "Unity",
"Unreal Engine", "Game Design", "Game Development", "Simulation Modeling", "Virtual Reality", "Augmented Reality",
"Quantum Computing", "3D Printing", "Nanotechnology"
]
data2 = pd.DataFrame({"Skill": skills})


df1 = pd.DataFrame(data)
df2 = pd.DataFrame(data2)
join_instruction = "Taking {Course Name:left} will help me learn {Skill:right}"
res = df1.sem_join(df2, join_instruction, recall_target = 0.9, precision_target = 0.9, sampling_percentage = 0.8)
print(f"Joining {df1.shape[0]} rows from df1 with {df2.shape[0]} rows from df2")
print(f"Naive join would require {df1.shape[0]*df2.shape[0]} LM calls")

res = df1.sem_join(df2, join_instruction, recall_target = 0.7, precision_target = 0.7)
print(f"Joined {df1.shape[0]} rows from df1 with {df2.shape[0]} rows from df2")
print(f"Naive join would require {df1.shape[0]*df2.shape[0]} LM calls")
print(res)

0 comments on commit f161ca7

Please sign in to comment.