zaidahmad16 · zaidahmad16 · May 26, 2026 · May 26, 2026
diff --git a/backend/main.py b/backend/main.py
@@ -405,6 +405,35 @@ def get_courses_batch(request: Request, codes: List[str]):
     finally:
         conn.close()
 
+@app.get("/courses/{course_code}/instructors")
+@limiter.limit("60/minute")
+def get_course_instructors(request: Request, course_code: str):
+    code = course_code.upper().replace("-", " ")
+    if not COURSE_CODE_RE.match(code):
+        raise HTTPException(status_code=400, detail="Invalid course code")
+    conn = get_connection()
+    try:
+        cur = conn.cursor()
+        cur.execute("""
+            SELECT DISTINCT term, instructor_name
+            FROM course_instructors
+            WHERE course_code = %s
+            ORDER BY term, instructor_name
+        """, (code,))
+        rows = cur.fetchall()
+        cur.close()
+        # Group by term
+        terms: dict = {}
+        for term, name in rows:
+            terms.setdefault(term, []).append(name)
+        return [{"term": t, "instructors": names} for t, names in terms.items()]
+    except Exception as e:
+        logger.error("GET /courses/%s/instructors failed: %s", course_code, e, exc_info=True)
+        raise HTTPException(status_code=500, detail="Internal server error")
+    finally:
+        conn.close()
+
+
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=8000)

diff --git a/backend/timetable_scraper.py b/backend/timetable_scraper.py
@@ -0,0 +1,192 @@
+"""
+Scrapes Carleton Central timetable for instructor names per course per term.
+Populates the course_instructors table.
+
+Usage: python3 timetable_scraper.py
+"""
+
+import os, re, time, requests
+from psycopg2.extras import execute_values
+from db import get_connection
+from dotenv import load_dotenv
+
+load_dotenv()
+
+BASE_URL = "https://central.carleton.ca/prod"
+TERMS = [
+    ("202630", "Fall 2026"),
+    ("202710", "Winter 2027"),
+]
+
+# All undergrad subject codes we care about (pulled from the search page)
+def get_subject_codes(session: requests.Session, term_code: str, session_id: str) -> list[str]:
+    resp = session.post(f"{BASE_URL}/bwysched.p_search_fields", data={
+        "wsea_code": "EXT",
+        "term_code": term_code,
+        "session_id": session_id,
+    })
+    return re.findall(r'<option value="([A-Z]{2,5})"', resp.text)
+
+
+def get_session_id(session: requests.Session, term_code: str) -> str:
+    resp = session.get(f"{BASE_URL}/bwysched.p_select_term?wsea_code=EXT")
+    sid = re.search(r'session_id" value="(\d+)"', resp.text)
+    if not sid:
+        raise RuntimeError("Could not get session_id")
+    # Now post the term selection to advance the session
+    session.post(f"{BASE_URL}/bwysched.p_search_fields", data={
+        "wsea_code": "EXT",
+        "term_code": term_code,
+        "session_id": sid.group(1),
+    })
+    return sid.group(1)
+
+
+DUMMY_FIELDS = {
+    "sel_aud": "dummy", "sel_subj": "dummy", "sel_camp": "dummy",
+    "sel_sess": "dummy", "sel_attr": "dummy", "sel_levl": "dummy",
+    "sel_schd": "dummy", "sel_insm": "dummy", "sel_link": "dummy",
+    "sel_wait": "dummy", "sel_day": "dummy", "sel_begin_hh": "dummy",
+    "sel_begin_mi": "dummy", "sel_begin_am_pm": "dummy",
+    "sel_end_hh": "dummy", "sel_end_mi": "dummy",
+    "sel_end_am_pm": "dummy", "sel_instruct": "dummy",
+    "sel_special": "dummy", "sel_resd": "dummy", "sel_breadth": "dummy",
+}
+
+DAY_FIELDS = {
+    "sel_day": ["m", "t", "w", "r", "f", "s"],
+    "sel_begin_hh": "0", "sel_begin_mi": "0", "sel_begin_am_pm": "a",
+    "sel_end_hh": "23", "sel_end_mi": "59", "sel_end_am_pm": "p",
+    "sel_levl": "UG",
+}
+
+
+def parse_instructors(html: str, subj: str, term_label: str) -> list[tuple]:
+    """
+    Returns list of (course_code, term, instructor_name) tuples.
+    Only includes Lecture/Seminar rows to avoid duplicating per tutorial section.
+    """
+    results = []
+    seen = set()
+    for row in re.split(r'<tr\b', html):
+        code_m = re.search(rf'{re.escape(subj)}\s+(\d{{4}}[A-Z]?)</font>', row)
+        type_m = re.search(r'<td[^>]*>(Lecture|Seminar)</td>', row)
+        if not code_m or not type_m:
+            continue
+        # Instructor is the last plain-text td (no href inside)
+        plain_tds = re.findall(r'<td[^>]*>\s*([A-Za-z][^<\n]{2,50}?)\s*</td>', row)
+        if not plain_tds:
+            continue
+        instructor = plain_tds[-1].strip()
+        if not instructor or '&nbsp;' in instructor or instructor.lower() in (
+            'tba', 'staff', 'lecture', 'tutorial', 'lab', 'seminar', 'yes', 'no', 'open', 'closed'
+        ):
+            continue
+        code = f"{subj} {code_m.group(1)}"
+        key = (code, term_label, instructor)
+        if key not in seen:
+            seen.add(key)
+            results.append(key)
+    return results
+
+
+def scrape_subject(session: requests.Session, term_code: str, term_label: str,
+                   session_id: str, subj: str) -> list[tuple]:
+    data = {
+        "wsea_code": "EXT",
+        "term_code": term_code,
+        "session_id": session_id,
+        "ws_numb": "",
+        **DUMMY_FIELDS,
+        "sel_subj": subj,
+        "sel_number": "",
+        **DAY_FIELDS,
+    }
+    # requests doesn't handle repeated keys well with dict; use list of tuples
+    payload = []
+    for k, v in data.items():
+        if isinstance(v, list):
+            for item in v:
+                payload.append((k, item))
+        else:
+            payload.append((k, v))
+    # Add the multi-value day fields correctly
+    payload = [
+        ("wsea_code", "EXT"), ("term_code", term_code), ("session_id", session_id),
+        ("ws_numb", ""), ("sel_aud", "dummy"), ("sel_subj", "dummy"),
+        ("sel_camp", "dummy"), ("sel_sess", "dummy"), ("sel_attr", "dummy"),
+        ("sel_levl", "dummy"), ("sel_schd", "dummy"), ("sel_insm", "dummy"),
+        ("sel_link", "dummy"), ("sel_wait", "dummy"), ("sel_day", "dummy"),
+        ("sel_begin_hh", "dummy"), ("sel_begin_mi", "dummy"),
+        ("sel_begin_am_pm", "dummy"), ("sel_end_hh", "dummy"),
+        ("sel_end_mi", "dummy"), ("sel_end_am_pm", "dummy"),
+        ("sel_instruct", "dummy"), ("sel_special", "dummy"),
+        ("sel_resd", "dummy"), ("sel_breadth", "dummy"),
+        ("sel_subj", subj), ("sel_number", ""),
+        ("sel_day", "m"), ("sel_day", "t"), ("sel_day", "w"),
+        ("sel_day", "r"), ("sel_day", "f"), ("sel_day", "s"),
+        ("sel_begin_hh", "0"), ("sel_begin_mi", "0"), ("sel_begin_am_pm", "a"),
+        ("sel_end_hh", "23"), ("sel_end_mi", "59"), ("sel_end_am_pm", "p"),
+        ("sel_levl", "UG"),
+    ]
+    resp = session.post(f"{BASE_URL}/bwysched.p_course_search", data=payload)
+    return parse_instructors(resp.text, subj, term_label)
+
+
+def create_table(conn):
+    with conn.cursor() as cur:
+        cur.execute("""
+            CREATE TABLE IF NOT EXISTS course_instructors (
+                id SERIAL PRIMARY KEY,
+                course_code TEXT NOT NULL,
+                term TEXT NOT NULL,
+                instructor_name TEXT NOT NULL,
+                UNIQUE(course_code, term, instructor_name)
+            )
+        """)
+    conn.commit()
+
+
+def upsert_rows(conn, rows: list[tuple]):
+    if not rows:
+        return
+    with conn.cursor() as cur:
+        execute_values(cur, """
+            INSERT INTO course_instructors (course_code, term, instructor_name)
+            VALUES %s
+            ON CONFLICT (course_code, term, instructor_name) DO NOTHING
+        """, rows)
+    conn.commit()
+
+
+def main():
+    conn = get_connection()
+    create_table(conn)
+
+    http = requests.Session()
+    http.headers.update({"User-Agent": "CarletonCourseMap/1.0 (student project)"})
+
+    total = 0
+    for term_code, term_label in TERMS:
+        print(f"\n=== {term_label} ===")
+        session_id = get_session_id(http, term_code)
+        subjects = get_subject_codes(http, term_code, session_id)
+        print(f"Found {len(subjects)} subjects")
+
+        for subj in subjects:
+            try:
+                rows = scrape_subject(http, term_code, term_label, session_id, subj)
+                if rows:
+                    upsert_rows(conn, rows)
+                    print(f"  {subj}: {len(rows)} sections")
+                    total += len(rows)
+                time.sleep(0.4)  # be polite
+            except Exception as e:
+                print(f"  {subj}: ERROR {e}")
+
+    conn.close()
+    print(f"\nDone. {total} total rows inserted.")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/frontend/app/api/rmp/route.js b/frontend/app/api/rmp/route.js
@@ -0,0 +1,46 @@
+import { RMPClient } from 'ratemyprofessors-client'
+import { NextResponse } from 'next/server'
+
+const CARLETON_SCHOOL_ID = '1420'
+const client = new RMPClient()
+
+// Cache results in memory for the lifetime of the server process
+const cache = new Map()
+
+export async function GET(request) {
+  const { searchParams } = new URL(request.url)
+  const name = searchParams.get('name')?.trim()
+
+  if (!name) {
+    return NextResponse.json({ error: 'name is required' }, { status: 400 })
+  }
+
+  if (cache.has(name)) {
+    return NextResponse.json(cache.get(name))
+  }
+
+  try {
+    const result = await client.searchProfessors(name, CARLETON_SCHOOL_ID)
+    // Pick the first result that belongs to Carleton
+    const prof = result.professors?.find(p => p.school?.id === CARLETON_SCHOOL_ID) ?? null
+
+    const data = prof
+      ? {
+          found: true,
+          name: prof.name,
+          department: prof.department,
+          overall_rating: prof.overall_rating,
+          difficulty: prof.level_of_difficulty,
+          num_ratings: prof.num_ratings,
+          would_take_again: prof.percent_take_again >= 0 ? Math.round(prof.percent_take_again) : null,
+          rmp_url: `https://www.ratemyprofessors.com/professor/${prof.id}`,
+        }
+      : { found: false }
+
+    cache.set(name, data)
+    return NextResponse.json(data)
+  } catch (err) {
+    console.error('RMP lookup failed for', name, err)
+    return NextResponse.json({ found: false })
+  }
+}