Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions backend/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -405,6 +405,35 @@ def get_courses_batch(request: Request, codes: List[str]):
finally:
conn.close()

@app.get("/courses/{course_code}/instructors")
@limiter.limit("60/minute")
def get_course_instructors(request: Request, course_code: str):
code = course_code.upper().replace("-", " ")
if not COURSE_CODE_RE.match(code):
raise HTTPException(status_code=400, detail="Invalid course code")
conn = get_connection()
try:
cur = conn.cursor()
cur.execute("""
SELECT DISTINCT term, instructor_name
FROM course_instructors
WHERE course_code = %s
ORDER BY term, instructor_name
""", (code,))
rows = cur.fetchall()
cur.close()
# Group by term
terms: dict = {}
for term, name in rows:
terms.setdefault(term, []).append(name)
return [{"term": t, "instructors": names} for t, names in terms.items()]
except Exception as e:
logger.error("GET /courses/%s/instructors failed: %s", course_code, e, exc_info=True)
raise HTTPException(status_code=500, detail="Internal server error")
finally:
conn.close()


if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)
Expand Down
192 changes: 192 additions & 0 deletions backend/timetable_scraper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
"""
Scrapes Carleton Central timetable for instructor names per course per term.
Populates the course_instructors table.

Usage: python3 timetable_scraper.py
"""

import os, re, time, requests
from psycopg2.extras import execute_values
from db import get_connection
from dotenv import load_dotenv

load_dotenv()

BASE_URL = "https://central.carleton.ca/prod"
TERMS = [
("202630", "Fall 2026"),
("202710", "Winter 2027"),
]

# All undergrad subject codes we care about (pulled from the search page)
def get_subject_codes(session: requests.Session, term_code: str, session_id: str) -> list[str]:
resp = session.post(f"{BASE_URL}/bwysched.p_search_fields", data={
"wsea_code": "EXT",
"term_code": term_code,
"session_id": session_id,
})
return re.findall(r'<option value="([A-Z]{2,5})"', resp.text)


def get_session_id(session: requests.Session, term_code: str) -> str:
resp = session.get(f"{BASE_URL}/bwysched.p_select_term?wsea_code=EXT")
sid = re.search(r'session_id" value="(\d+)"', resp.text)
if not sid:
raise RuntimeError("Could not get session_id")
# Now post the term selection to advance the session
session.post(f"{BASE_URL}/bwysched.p_search_fields", data={
"wsea_code": "EXT",
"term_code": term_code,
"session_id": sid.group(1),
})
return sid.group(1)


DUMMY_FIELDS = {
"sel_aud": "dummy", "sel_subj": "dummy", "sel_camp": "dummy",
"sel_sess": "dummy", "sel_attr": "dummy", "sel_levl": "dummy",
"sel_schd": "dummy", "sel_insm": "dummy", "sel_link": "dummy",
"sel_wait": "dummy", "sel_day": "dummy", "sel_begin_hh": "dummy",
"sel_begin_mi": "dummy", "sel_begin_am_pm": "dummy",
"sel_end_hh": "dummy", "sel_end_mi": "dummy",
"sel_end_am_pm": "dummy", "sel_instruct": "dummy",
"sel_special": "dummy", "sel_resd": "dummy", "sel_breadth": "dummy",
}

DAY_FIELDS = {
"sel_day": ["m", "t", "w", "r", "f", "s"],
"sel_begin_hh": "0", "sel_begin_mi": "0", "sel_begin_am_pm": "a",
"sel_end_hh": "23", "sel_end_mi": "59", "sel_end_am_pm": "p",
"sel_levl": "UG",
}


def parse_instructors(html: str, subj: str, term_label: str) -> list[tuple]:
"""
Returns list of (course_code, term, instructor_name) tuples.
Only includes Lecture/Seminar rows to avoid duplicating per tutorial section.
"""
results = []
seen = set()
for row in re.split(r'<tr\b', html):
code_m = re.search(rf'{re.escape(subj)}\s+(\d{{4}}[A-Z]?)</font>', row)
type_m = re.search(r'<td[^>]*>(Lecture|Seminar)</td>', row)
if not code_m or not type_m:
continue
# Instructor is the last plain-text td (no href inside)
plain_tds = re.findall(r'<td[^>]*>\s*([A-Za-z][^<\n]{2,50}?)\s*</td>', row)
if not plain_tds:
continue
instructor = plain_tds[-1].strip()
if not instructor or '&nbsp;' in instructor or instructor.lower() in (
'tba', 'staff', 'lecture', 'tutorial', 'lab', 'seminar', 'yes', 'no', 'open', 'closed'
):
continue
code = f"{subj} {code_m.group(1)}"
key = (code, term_label, instructor)
if key not in seen:
seen.add(key)
results.append(key)
return results


def scrape_subject(session: requests.Session, term_code: str, term_label: str,
session_id: str, subj: str) -> list[tuple]:
data = {
"wsea_code": "EXT",
"term_code": term_code,
"session_id": session_id,
"ws_numb": "",
**DUMMY_FIELDS,
"sel_subj": subj,
"sel_number": "",
**DAY_FIELDS,
}
# requests doesn't handle repeated keys well with dict; use list of tuples
payload = []
for k, v in data.items():
if isinstance(v, list):
for item in v:
payload.append((k, item))
else:
payload.append((k, v))
# Add the multi-value day fields correctly
payload = [
("wsea_code", "EXT"), ("term_code", term_code), ("session_id", session_id),
("ws_numb", ""), ("sel_aud", "dummy"), ("sel_subj", "dummy"),
("sel_camp", "dummy"), ("sel_sess", "dummy"), ("sel_attr", "dummy"),
("sel_levl", "dummy"), ("sel_schd", "dummy"), ("sel_insm", "dummy"),
("sel_link", "dummy"), ("sel_wait", "dummy"), ("sel_day", "dummy"),
("sel_begin_hh", "dummy"), ("sel_begin_mi", "dummy"),
("sel_begin_am_pm", "dummy"), ("sel_end_hh", "dummy"),
("sel_end_mi", "dummy"), ("sel_end_am_pm", "dummy"),
("sel_instruct", "dummy"), ("sel_special", "dummy"),
("sel_resd", "dummy"), ("sel_breadth", "dummy"),
("sel_subj", subj), ("sel_number", ""),
("sel_day", "m"), ("sel_day", "t"), ("sel_day", "w"),
("sel_day", "r"), ("sel_day", "f"), ("sel_day", "s"),
("sel_begin_hh", "0"), ("sel_begin_mi", "0"), ("sel_begin_am_pm", "a"),
("sel_end_hh", "23"), ("sel_end_mi", "59"), ("sel_end_am_pm", "p"),
("sel_levl", "UG"),
]
resp = session.post(f"{BASE_URL}/bwysched.p_course_search", data=payload)
return parse_instructors(resp.text, subj, term_label)


def create_table(conn):
with conn.cursor() as cur:
cur.execute("""
CREATE TABLE IF NOT EXISTS course_instructors (
id SERIAL PRIMARY KEY,
course_code TEXT NOT NULL,
term TEXT NOT NULL,
instructor_name TEXT NOT NULL,
UNIQUE(course_code, term, instructor_name)
)
""")
conn.commit()


def upsert_rows(conn, rows: list[tuple]):
if not rows:
return
with conn.cursor() as cur:
execute_values(cur, """
INSERT INTO course_instructors (course_code, term, instructor_name)
VALUES %s
ON CONFLICT (course_code, term, instructor_name) DO NOTHING
""", rows)
conn.commit()


def main():
conn = get_connection()
create_table(conn)

http = requests.Session()
http.headers.update({"User-Agent": "CarletonCourseMap/1.0 (student project)"})

total = 0
for term_code, term_label in TERMS:
print(f"\n=== {term_label} ===")
session_id = get_session_id(http, term_code)
subjects = get_subject_codes(http, term_code, session_id)
print(f"Found {len(subjects)} subjects")

for subj in subjects:
try:
rows = scrape_subject(http, term_code, term_label, session_id, subj)
if rows:
upsert_rows(conn, rows)
print(f" {subj}: {len(rows)} sections")
total += len(rows)
time.sleep(0.4) # be polite
except Exception as e:
print(f" {subj}: ERROR {e}")

conn.close()
print(f"\nDone. {total} total rows inserted.")


if __name__ == "__main__":
main()
46 changes: 46 additions & 0 deletions frontend/app/api/rmp/route.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import { RMPClient } from 'ratemyprofessors-client'
import { NextResponse } from 'next/server'

const CARLETON_SCHOOL_ID = '1420'
const client = new RMPClient()

// Cache results in memory for the lifetime of the server process
const cache = new Map()

export async function GET(request) {
const { searchParams } = new URL(request.url)
const name = searchParams.get('name')?.trim()

if (!name) {
return NextResponse.json({ error: 'name is required' }, { status: 400 })
}

if (cache.has(name)) {
return NextResponse.json(cache.get(name))
}

try {
const result = await client.searchProfessors(name, CARLETON_SCHOOL_ID)
// Pick the first result that belongs to Carleton
const prof = result.professors?.find(p => p.school?.id === CARLETON_SCHOOL_ID) ?? null

const data = prof
? {
found: true,
name: prof.name,
department: prof.department,
overall_rating: prof.overall_rating,
difficulty: prof.level_of_difficulty,
num_ratings: prof.num_ratings,
would_take_again: prof.percent_take_again >= 0 ? Math.round(prof.percent_take_again) : null,
rmp_url: `https://www.ratemyprofessors.com/professor/${prof.id}`,
}
: { found: false }

cache.set(name, data)
return NextResponse.json(data)
} catch (err) {
console.error('RMP lookup failed for', name, err)
return NextResponse.json({ found: false })
}
}
Loading
Loading