Skip to content

Commit

Permalink
Update course_plan_scraper.py
Browse files Browse the repository at this point in the history
- Added sorting to faculties
  • Loading branch information
AtaTrkgl committed Jan 5, 2025
1 parent 11290ab commit 04403a3
Showing 1 changed file with 9 additions and 3 deletions.
12 changes: 9 additions & 3 deletions src/course_plan_scraper.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,8 +154,8 @@ def scrap_course_plans(self):
Logger.log_info("Scraping Course Programs")
t0 = perf_counter() # Start the timer for logging.

with open(PROGRAMME_CODES_FILE_PATH, "r", encoding="utf-8") as f:
programme_codes = [line.strip().split("|") for line in f.readlines()]
with open(PROGRAMME_CODES_FILE_PATH, "r", encoding="utf-8") as ordered_faculty_names:
programme_codes = [line.strip().split("|") for line in ordered_faculty_names.readlines()]

thread_count = min(MAX_THREAD_COUNT, len(programme_codes))
programme_code_chunks = self.split_programme_codes_into_chunks(programme_codes, thread_count)
Expand All @@ -171,7 +171,13 @@ def scrap_course_plans(self):
for t in threads: t.start()
for t in threads: t.join()

# Faculties
ordered_faculty_names = []
for _, __, faculty, ___ in programme_codes:
if faculty not in ordered_faculty_names:
ordered_faculty_names.append(faculty)

# Log how long the process took.
t1 = perf_counter()
Logger.log_info(f"Scraping Course Plans Completed in [green]{round(t1 - t0, 2)}[/green] seconds.")
return self.faculty_course_plans
return {faculty: self.faculty_course_plans[faculty] for faculty in ordered_faculty_names if faculty in self.faculty_course_plans}

0 comments on commit 04403a3

Please sign in to comment.