From 04657ef92dc60ee6eca70658ce55dac48a788500 Mon Sep 17 00:00:00 2001 From: Helder Mendes Date: Thu, 12 Mar 2026 21:19:41 +0100 Subject: [PATCH 1/2] chore: documentation updates, seo files, and cleanup --- app-next/docs/AUTH_ARCHITECTURE.md | 291 ++++++++++++++++++++++++++++ app-next/docs/TESTING_GUIDE.md | 1 + app-next/docs/flask-vs-migration.md | 36 ++-- app-next/src/app/robots.ts | 42 ++++ app-next/src/app/sitemap.ts | 208 ++++++++++++++++++++ 5 files changed, 560 insertions(+), 18 deletions(-) create mode 100644 app-next/docs/AUTH_ARCHITECTURE.md create mode 100644 app-next/docs/TESTING_GUIDE.md create mode 100644 app-next/src/app/robots.ts create mode 100644 app-next/src/app/sitemap.ts diff --git a/app-next/docs/AUTH_ARCHITECTURE.md b/app-next/docs/AUTH_ARCHITECTURE.md new file mode 100644 index 00000000..a406b4b1 --- /dev/null +++ b/app-next/docs/AUTH_ARCHITECTURE.md @@ -0,0 +1,291 @@ +# Auth Architecture — Old vs New + +Last updated: 2026-03-06 + +--- + +## Why This Document Exists + +The OpenML frontend is being migrated from a legacy React SPA (backed by a Flask proxy) to a +modern Next.js application. One of the most significant architectural changes is in +**authentication and authorization**: we removed Flask as a middleman and replaced it with a +direct, modern auth layer built into Next.js. + +This document explains: + +- What the old architecture looked like and why it had limitations +- What the new architecture does and why it is better +- What the **backend team must do** before deploying to production or test.openml.org +- What is still Flask-dependent and what has been fully migrated + +> For detailed implementation docs see: +> +> - [AUTHENTICATION.md](./AUTHENTICATION.md) — NextAuth.js flows, tables, env vars +> - [ENVIRONMENTS.md](./ENVIRONMENTS.md) — localhost vs test vs production differences +> - [flask-vs-migration.md](./flask-vs-migration.md) — full feature migration status + +--- + +## Before: Old React + Flask Architecture + +``` +┌─────────────────────────────────────────────────────────────┐ +│ Browser (React SPA) │ +└─────────────────────────┬───────────────────────────────────┘ + │ All requests + ▼ +┌─────────────────────────────────────────────────────────────┐ +│ Flask Backend (Python) │ +│ ├── Auth: login, register, session management │ +│ ├── API key: read/write session_hash in MySQL │ +│ ├── Proxy: forwards data requests to OpenML REST API │ +│ └── Profile: read/write user data │ +└──────────┬──────────────────────────┬───────────────────────┘ + │ │ + ▼ ▼ + MySQL Database OpenML REST API + (users, sessions) (Java/PHP backend) + (datasets, tasks, runs) +``` + +**Problems with this architecture:** + +- Every browser request went through Flask, even for read-only data +- Flask was a proxy bottleneck — two network hops for most operations +- Authentication was tightly coupled to Flask's session management +- Modern features (OAuth, passkeys) would require Flask changes +- No type safety, no built-in CSRF protection + +--- + +## After: New Next.js Architecture + +``` +┌─────────────────────────────────────────────────────────────┐ +│ Browser (Next.js — Server + Client) │ +│ │ +│ ┌─────────────────────────────────────────────────────┐ │ +│ │ NextAuth.js (/api/auth/...) │ │ +│ │ ├── Email/password (Argon2i, same hash as Flask) │ │ +│ │ ├── OAuth: GitHub, Google │ │ +│ │ ├── Passkeys (WebAuthn / biometrics) │ │ +│ │ └── Session: JWT in HTTP-only cookie │ │ +│ └──────────────────────┬──────────────────────────────┘ │ +│ │ DIRECT │ +└──────────────────────────┼──────────────────────────────────┘ + │ │ + ▼ ▼ + MySQL Database OpenML REST API + (auth only) (datasets, tasks, + DIRECT connection runs, uploads) + no Flask proxy DIRECT — no Flask +``` + +**What changed:** + +- Next.js connects **directly** to MySQL for authentication — no Flask in between +- Data requests go **directly** to the OpenML REST API — no Flask proxy +- Flask is now only used for three small features (see below) +- Modern auth methods (OAuth, passkeys) are handled natively by NextAuth.js + +--- + +## What Is Still Flask-Dependent + +| Feature | Status | Notes | +| ------------------------ | ---------------- | ------------------------------------------------------------------ | +| **API key regeneration** | Still uses Flask | Can be migrated to direct MySQL (easy) | +| **API key fetch** | Still uses Flask | Can be migrated to direct MySQL (easy) | +| **Dataset stats** | Still uses Flask | Stats endpoint proxies to `FLASK_BACKEND_URL` | +| All other auth | ✅ Migrated | NextAuth.js + direct MySQL | +| All data reads | ✅ No Flask | Elasticsearch + OpenML REST API | +| Uploads / likes | ✅ No Flask | Direct to OpenML REST API with user's `session_hash` | + +> See [flask-vs-migration.md](./flask-vs-migration.md) for the complete feature list. + +--- + +## The API Key — How It Flows + +The `session_hash` column in the `users` table is the OpenML API key. It is the same value +that users copy from their OpenML profile page and paste into Python/R clients. + +``` +MySQL users.session_hash + │ + ▼ (loaded at login by NextAuth) +NextAuth session (server-side JWT, HTTP-only cookie) + │ + ▼ (passed to API routes on the server) +/api/datasets/upload → POST to OpenML REST API with api_key=session_hash +/api/datasets/[id]/edit → POST to OpenML REST API with api_key=session_hash +/api/tasks/create → POST to OpenML REST API with api_key=session_hash +/api/collections/create → POST to OpenML REST API with api_key=session_hash +``` + +**Critical:** API keys are NOT shared between servers. + +- A `session_hash` from `openml.org` is rejected by `test.openml.org` and vice versa. +- Local accounts (Docker MySQL) have no `session_hash` recognized by either server. + +--- + +## Backend Team: Required Actions + +### For Production (`openml.org`) + +**One-time setup — run these SQL statements on the production MySQL database:** + +```sql +-- 1. Email confirmation tokens (required for new user registration) +CREATE TABLE IF NOT EXISTS email_confirmation_token ( + id INT AUTO_INCREMENT PRIMARY KEY, + user_id INT NOT NULL, + token VARCHAR(255) NOT NULL UNIQUE, + expires_at DATETIME NOT NULL, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + FOREIGN KEY (user_id) REFERENCES users(id) ON DELETE CASCADE, + INDEX idx_token (token), + INDEX idx_user_id (user_id), + INDEX idx_expires (expires_at) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; + +-- 2. Password reset tokens (required for forgot-password flow) +CREATE TABLE IF NOT EXISTS password_reset_token ( + id INT AUTO_INCREMENT PRIMARY KEY, + user_id INT NOT NULL, + token VARCHAR(255) NOT NULL UNIQUE, + expires_at DATETIME NOT NULL, + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + used BOOLEAN DEFAULT FALSE, + used_at DATETIME, + FOREIGN KEY (user_id) REFERENCES users(id) ON DELETE CASCADE, + INDEX idx_token (token), + INDEX idx_user_id (user_id), + INDEX idx_expires (expires_at) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; + +-- 3. Passkey credentials (required for WebAuthn biometric login) +CREATE TABLE IF NOT EXISTS user_passkeys ( + id INT AUTO_INCREMENT PRIMARY KEY, + user_id INT NOT NULL, + credential_id BLOB NOT NULL, + public_key BLOB NOT NULL, + sign_count INT DEFAULT 0, + transports VARCHAR(255), + device_name VARCHAR(255), + created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + last_used_at TIMESTAMP NULL, + FOREIGN KEY (user_id) REFERENCES users(id) ON DELETE CASCADE, + INDEX idx_user_id (user_id) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci; +``` + +> These are **additive only** — nothing existing is modified or deleted. + +**What breaks without each table:** + +| Missing table | What breaks | What still works | +| -------------------------- | ------------------------------------------- | ---------------------- | +| `email_confirmation_token` | New user registration (stuck at `active=0`) | Existing user sign-in | +| `password_reset_token` | Forgot password / reset password | Everything else | +| `user_passkeys` | Passkey sign-up and sign-in | All other auth methods | + +**Environment variables required on the production server:** + +```env +# Next.js +NEXTAUTH_SECRET= +NEXTAUTH_URL=https://openml.org # Must match the exact public URL + +# Database — direct MySQL connection (replaces Flask DB proxy) +DATABASE_URL=mysql://user:pass@mysql-host:3306/openml + +# Email — for registration confirmation and password reset +SMTP_HOST=your.smtp.server +SMTP_PORT=587 +SMTP_SECURE=false +SMTP_USER=your@email.com +SMTP_PASS=yourpassword +SMTP_FROM=OpenML + +# OAuth (if enabled) +GITHUB_ID=... +GITHUB_SECRET=... +GOOGLE_ID=... +GOOGLE_SECRET=... + +# Passkeys +RP_ID=openml.org +RP_ORIGIN=https://openml.org + +# OpenML REST API +NEXT_PUBLIC_OPENML_API_URL=https://www.openml.org + +# Flask backend — still required for API key fetch/regeneration and dataset stats +FLASK_BACKEND_URL=https://www.openml.org +``` + +--- + +### For Test Server (`test.openml.org`) + +The test server needs the **exact same 3 tables** and the same environment variables, +but pointing to the test database and test domain. + +**Key differences from production:** + +```env +NEXTAUTH_URL=https://test.openml.org # ← test domain +RP_ID=test.openml.org # ← test domain for passkeys +RP_ORIGIN=https://test.openml.org # ← test domain for passkeys +NEXT_PUBLIC_OPENML_API_URL=https://test.openml.org # ← test API +FLASK_BACKEND_URL=https://test.openml.org # ← test Flask backend +DATABASE_URL=mysql://user:pass@test-mysql-host:3306/openml_test +``` + +**Important:** test.openml.org has a completely separate MySQL database. + +- Users registered on `openml.org` do NOT exist on `test.openml.org` +- API keys from `openml.org` are NOT valid on `test.openml.org` +- The 3 new tables must be created separately on each database + +--- + +### For Local Development (Backend Team Docker Stack) + +> **Action required:** When setting up the local Docker stack (full backend environment, like `openml/openml.org`), add the **same 3 SQL statements** from the "For Production" section above to your MySQL init script (e.g. `docker-entrypoint-initdb.d/init.sql`). Without them, registration, password reset, and passkey login will not work locally. + +--- + +## Feature Impact Summary + +| User action | Old (React + Flask) | New (Next.js) | +| --------------------------- | --------------------------- | ------------------------------ | +| Sign in (email/password) | Flask session | NextAuth.js → MySQL direct | +| Register | Flask → MySQL | NextAuth.js → MySQL direct | +| OAuth login (GitHub/Google) | Not supported | NextAuth.js | +| Passkey login | Not supported | NextAuth.js + WebAuthn | +| Forgot password | Flask | Next.js → MySQL direct | +| Upload dataset | Flask proxy → OpenML API | Next.js → OpenML API direct | +| Like / Unlike | Flask proxy → OpenML API | Next.js → OpenML API direct | +| Search / Browse | Direct → Elasticsearch | Next.js → Elasticsearch direct | +| API key regeneration | Flask | Still Flask (to be migrated) | + +--- + +## For New Frontend Developers + +**What you need to know:** + +1. **Auth is NextAuth.js** — use `useSession()` on the client, `getServerSession()` on the server. +2. **API key is `session.apikey`** — this is `users.session_hash` from MySQL, loaded at login. It must be sent to the OpenML REST API for any write operation (upload, edit, like). +3. **Never call Flask directly** for auth — it is not in the auth path anymore. +4. **DATABASE_URL is mandatory** — the app will not start without it. +5. **Two separate data sources:** + - MySQL (`DATABASE_URL`) → auth only (users, sessions, passkeys, tokens) + - OpenML REST API (`NEXT_PUBLIC_OPENML_API_URL`) → all content (datasets, tasks, runs) +6. **Environments are isolated** — API keys and user accounts are separate between `openml.org`, `test.openml.org`, and `localhost`. + +> See [ENVIRONMENTS.md](./ENVIRONMENTS.md) for the full environment reference. +> See [AUTHENTICATION.md](./AUTHENTICATION.md) for NextAuth.js implementation details. diff --git a/app-next/docs/TESTING_GUIDE.md b/app-next/docs/TESTING_GUIDE.md new file mode 100644 index 00000000..74373b2b --- /dev/null +++ b/app-next/docs/TESTING_GUIDE.md @@ -0,0 +1 @@ +1. Backward Compatibility diff --git a/app-next/docs/flask-vs-migration.md b/app-next/docs/flask-vs-migration.md index 27d36465..7d7d746b 100644 --- a/app-next/docs/flask-vs-migration.md +++ b/app-next/docs/flask-vs-migration.md @@ -2,19 +2,19 @@ ### ✅ Already Migrated (No Flask Needed) -| Feature | Implementation | Env Vars Needed | -| ---------------------- | ------------------------------- | ---------------------------------------- | -| Login (email/password) | Direct DB (Argon2i verification)| `DATABASE_URL` | -| Registration | Direct DB insert | `DATABASE_URL` + `SMTP_*` | -| Email confirmation | Direct DB (token-based) | `DATABASE_URL` | -| Forgot password | Direct DB + email | `DATABASE_URL` + `SMTP_*` | -| Reset password | Direct DB (Argon2i hashing) | `DATABASE_URL` | -| OAuth (Google/GitHub) | NextAuth + Direct DB | `DATABASE_URL` + `GITHUB_*` + `GOOGLE_*` | -| Passkey signup/login | Direct DB (WebAuthn) | `DATABASE_URL` + `RP_ID` + `RP_ORIGIN` | -| Profile updates | Direct DB | `DATABASE_URL` | -| Avatar upload | Direct filesystem/Vercel Blob | `BLOB_READ_WRITE_TOKEN` (Vercel only) | -| Search/Data | Elasticsearch + OpenML REST API | ` | -| User profiles | Elasticsearch | ` | +| Feature | Implementation | Env Vars Needed | +| ---------------------- | -------------------------------- | ---------------------------------------- | +| Login (email/password) | Direct DB (Argon2i verification) | `DATABASE_URL` | +| Registration | Direct DB insert | `DATABASE_URL` + `SMTP_*` | +| Email confirmation | Direct DB (token-based) | `DATABASE_URL` | +| Forgot password | Direct DB + email | `DATABASE_URL` + `SMTP_*` | +| Reset password | Direct DB (Argon2i hashing) | `DATABASE_URL` | +| OAuth (Google/GitHub) | NextAuth + Direct DB | `DATABASE_URL` + `GITHUB_*` + `GOOGLE_*` | +| Passkey signup/login | Direct DB (WebAuthn) | `DATABASE_URL` + `RP_ID` + `RP_ORIGIN` | +| Profile updates | Direct DB | `DATABASE_URL` | +| Avatar upload | Direct filesystem/Vercel Blob | `BLOB_READ_WRITE_TOKEN` (Vercel only) | +| Search/Data | Elasticsearch + OpenML REST API | `NEXT_PUBLIC_URL_ELASTICSEARCH` | +| User profiles | Elasticsearch | `NEXT_PUBLIC_URL_ELASTICSEARCH` | ### 🔧 Still Uses Flask (Could Be Migrated) @@ -25,11 +25,11 @@ ### ❌ Not Migratable (Requires OpenML REST API) -| Feature | Reason | Env Vars Needed | -| ------------------- | ----------------------------------- | ------------------------------------ | -| **Likes** | Must call OpenML REST API | `DATABASE_URL` (for valid API key) | -| **Data uploads** | OpenML REST API handles processing | `DATABASE_URL` (for valid API key) | -| **Run submissions** | OpenML REST API handles ML pipeline | `DATABASE_URL` (for valid API key) | +| Feature | Reason | Env Vars Needed | +| ------------------- | ----------------------------------- | ---------------------------------- | +| **Likes** | Must call OpenML REST API | `DATABASE_URL` (for valid API key) | +| **Data uploads** | OpenML REST API handles processing | `DATABASE_URL` (for valid API key) | +| **Run submissions** | OpenML REST API handles ML pipeline | `DATABASE_URL` (for valid API key) | --- diff --git a/app-next/src/app/robots.ts b/app-next/src/app/robots.ts new file mode 100644 index 00000000..2dfdf99a --- /dev/null +++ b/app-next/src/app/robots.ts @@ -0,0 +1,42 @@ +import { MetadataRoute } from "next"; + +const SITE_URL = process.env.NEXT_PUBLIC_URL || "https://www.openml.org"; + +export default function robots(): MetadataRoute.Robots { + return { + rules: [ + { + userAgent: "*", + allow: "/", + disallow: [ + // Auth & user-specific pages + "/auth/", + "/nl/auth/", + "/fr/auth/", + "/de/auth/", + "/dashboard/", + "/nl/dashboard/", + "/fr/dashboard/", + "/de/dashboard/", + // Create/edit forms (not useful for indexing) + "/datasets/upload", + "/nl/datasets/upload", + "/fr/datasets/upload", + "/de/datasets/upload", + "/tasks/create", + "/nl/tasks/create", + "/fr/tasks/create", + "/de/tasks/create", + "/collections/create", + "/nl/collections/create", + "/fr/collections/create", + "/de/collections/create", + "*/edit", + // API routes + "/api/", + ], + }, + ], + sitemap: `${SITE_URL}/sitemap.xml`, + }; +} diff --git a/app-next/src/app/sitemap.ts b/app-next/src/app/sitemap.ts new file mode 100644 index 00000000..db27264a --- /dev/null +++ b/app-next/src/app/sitemap.ts @@ -0,0 +1,208 @@ +import { MetadataRoute } from "next"; +import { getElasticsearchUrl } from "@/lib/elasticsearch"; + +const SITE_URL = + process.env.NEXT_PUBLIC_URL || "https://www.openml.org"; + +const LOCALES = ["nl", "fr", "de"] as const; + +// Static pages (English canonical + locale alternates) +const STATIC_PAGES = [ + { path: "/", changeFrequency: "weekly", priority: 1.0 }, + { path: "/datasets", changeFrequency: "daily", priority: 0.9 }, + { path: "/tasks", changeFrequency: "daily", priority: 0.9 }, + { path: "/flows", changeFrequency: "daily", priority: 0.9 }, + { path: "/runs", changeFrequency: "daily", priority: 0.8 }, + { path: "/benchmarks/tasks", changeFrequency: "weekly", priority: 0.8 }, + { path: "/benchmarks/runs", changeFrequency: "weekly", priority: 0.8 }, + { path: "/collections/tasks", changeFrequency: "weekly", priority: 0.7 }, + { path: "/collections/runs", changeFrequency: "weekly", priority: 0.7 }, + { path: "/about", changeFrequency: "monthly", priority: 0.6 }, + { path: "/meet-us", changeFrequency: "monthly", priority: 0.5 }, + { path: "/team", changeFrequency: "monthly", priority: 0.5 }, + { path: "/documentation", changeFrequency: "monthly", priority: 0.7 }, + { path: "/apis", changeFrequency: "monthly", priority: 0.6 }, + { path: "/terms", changeFrequency: "monthly", priority: 0.4 }, + { path: "/contribute", changeFrequency: "monthly", priority: 0.5 }, +] as const; + +// ES entity config: index, URL prefix, active filter field +const ENTITY_CONFIGS = [ + { + index: "data", + prefix: "/datasets", + idField: "did", + filter: { term: { status: "active" } }, + changeFrequency: "monthly" as const, + priority: 0.8, + }, + { + index: "task", + prefix: "/tasks", + idField: "task_id", + filter: { term: { status: "active" } }, + changeFrequency: "monthly" as const, + priority: 0.7, + }, + { + index: "flow", + prefix: "/flows", + idField: "id", + filter: null, + changeFrequency: "monthly" as const, + priority: 0.6, + }, +] as const; + +const PAGE_SIZE = 5000; + +type SitemapEntry = { + index: number; + type: "static" | "data" | "task" | "flow"; +}; + +export async function generateSitemaps(): Promise<{ id: number }[]> { + // Sitemap 0 is always static pages. + // Then one sitemap per PAGE_SIZE block per entity type. + const ids: { id: number }[] = [{ id: 0 }]; + + let offset = 1; + for (const config of ENTITY_CONFIGS) { + try { + const res = await fetch(getElasticsearchUrl(`${config.index}/_count`), { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify( + config.filter ? { query: config.filter } : { query: { match_all: {} } } + ), + next: { revalidate: 3600 }, + }); + if (res.ok) { + const data = await res.json(); + const count: number = data.count ?? 0; + const pages = Math.ceil(count / PAGE_SIZE); + for (let i = 0; i < pages; i++) { + ids.push({ id: offset + i }); + } + offset += Math.max(pages, 1); + } + } catch { + // If ES is unavailable, still include at least one sitemap block + ids.push({ id: offset }); + offset++; + } + } + + return ids; +} + +export default async function sitemap({ + id, +}: { + id: number; +}): Promise { + // Sitemap 0: static pages with locale alternates + if (id === 0) { + const now = new Date(); + return STATIC_PAGES.flatMap(({ path, changeFrequency, priority }) => { + const canonical = `${SITE_URL}${path}`; + const localeAlternates = Object.fromEntries([ + ["x-default", canonical], + ...LOCALES.map((locale) => [ + locale, + `${SITE_URL}/${locale}${path}`, + ]), + ]); + return [ + { + url: canonical, + lastModified: now, + changeFrequency, + priority, + alternates: { languages: localeAlternates }, + }, + ]; + }); + } + + // Dynamic entity sitemaps — figure out which config and page this id maps to + let offset = 1; + for (const config of ENTITY_CONFIGS) { + let count = 0; + try { + const countRes = await fetch( + getElasticsearchUrl(`${config.index}/_count`), + { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify( + config.filter ? { query: config.filter } : { query: { match_all: {} } } + ), + next: { revalidate: 3600 }, + } + ); + if (countRes.ok) { + const countData = await countRes.json(); + count = countData.count ?? 0; + } + } catch { + // fallback + } + + const pages = Math.max(Math.ceil(count / PAGE_SIZE), 1); + if (id < offset + pages) { + // This sitemap id belongs to this entity type + const page = id - offset; + return fetchEntitySitemap(config, page); + } + offset += pages; + } + + return []; +} + +async function fetchEntitySitemap( + config: (typeof ENTITY_CONFIGS)[number], + page: number +): Promise { + try { + const query = config.filter + ? { query: config.filter } + : { query: { match_all: {} } }; + + const res = await fetch(getElasticsearchUrl(`${config.index}/_search`), { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + ...query, + _source: [config.idField, "date"], + sort: [{ date: { order: "desc" } }], + from: page * PAGE_SIZE, + size: PAGE_SIZE, + }), + next: { revalidate: 3600 }, + }); + + if (!res.ok) return []; + + const data = await res.json(); + const hits: Array<{ _source: Record }> = + data.hits?.hits ?? []; + + return hits + .map((hit) => { + const entityId = hit._source[config.idField]; + if (!entityId) return null; + const dateStr = hit._source["date"] as string | undefined; + return { + url: `${SITE_URL}${config.prefix}/${entityId}`, + lastModified: dateStr ? new Date(dateStr) : new Date(), + changeFrequency: config.changeFrequency, + priority: config.priority, + }; + }) + .filter((entry): entry is NonNullable => entry !== null); + } catch { + return []; + } +} From 20478527acb0cc4887cdaa76c7fe1a392c9dc8a4 Mon Sep 17 00:00:00 2001 From: Helder Mendes Date: Fri, 13 Mar 2026 16:20:16 +0100 Subject: [PATCH 2/2] chore: remove outdated roadmap, deployment doc and dev-only db scripts --- app-next/DEPLOYMENT.md | 238 ------------------------------------- app-next/TEAM_ROADMAP.md | 208 -------------------------------- app-next/check_groups.js | 17 --- app-next/check_passkeys.js | 23 ---- app-next/check_users.js | 17 --- 5 files changed, 503 deletions(-) delete mode 100644 app-next/DEPLOYMENT.md delete mode 100644 app-next/TEAM_ROADMAP.md delete mode 100644 app-next/check_groups.js delete mode 100644 app-next/check_passkeys.js delete mode 100644 app-next/check_users.js diff --git a/app-next/DEPLOYMENT.md b/app-next/DEPLOYMENT.md deleted file mode 100644 index ff5b13a1..00000000 --- a/app-next/DEPLOYMENT.md +++ /dev/null @@ -1,238 +0,0 @@ -# Deploying app-next to Vercel - -## Quick Start - -### 1. Push to GitHub - -```bash -git add . -git commit -m "Prepare for Vercel deployment" -git push origin main -``` - -### 2. Import to Vercel - -1. Go to [vercel.com](https://vercel.com) -2. Click **Add New** → **Project** -3. Import your `openml.org` repository -4. Select the **app-next** directory as the root directory -5. Framework Preset: **Next.js** (auto-detected) -6. Click **Deploy** - -### 3. Configure Environment Variables - -After deployment, go to **Project Settings** → **Environment Variables** and add: - -#### Minimal Configuration (Using Public OpenML API) - -```bash -# API Configuration -NEXT_PUBLIC_OPENML_URL=https://www.openml.org -NEXT_PUBLIC_URL_API=https://www.openml.org/api/v1 - -# Elasticsearch -NEXT_PUBLIC_ELASTICSEARCH_SERVER=https://www.openml.org/es - -# MinIO (File Storage) -NEXT_PUBLIC_URL_MINIO=https://www.openml.org/data - -# Node Environment -NODE_ENV=production -NEXT_TELEMETRY_DISABLED=1 -``` - -For each variable: - -- Click **Add New** -- Enter **Name** and **Value** -- Select: **Production**, **Preview**, **Development** -- Click **Save** - -### 4. Redeploy - -After adding environment variables: - -1. Go to **Deployments** tab -2. Click **⋯** on latest deployment -3. Click **Redeploy** - -Your app will be live at `https://your-project.vercel.app` - ---- - -## Optional Configurations - -### Analytics (Recommended) - -#### Vercel Analytics - -Automatically enabled on Vercel. No configuration needed. - -#### Google Analytics - -Add to environment variables: - -```bash -NEXT_PUBLIC_GA_ID=G-XXXXXXXXXX -``` - -### OAuth Login (GitHub) - -1. Create GitHub OAuth App: - - Go to: https://github.com/settings/developers - - Click **New OAuth App** - - **Application name**: OpenML - - **Homepage URL**: https://your-project.vercel.app - - **Authorization callback URL**: https://your-project.vercel.app/api/auth/github/callback - - Click **Register application** - - Copy **Client ID** and generate **Client Secret** - -2. Add to Vercel environment variables: - -```bash -NEXT_PUBLIC_GITHUB_OAUTH_CLIENT_ID=your_github_client_id -GITHUB_OAUTH_CLIENT_SECRET=your_github_client_secret -``` - -### Error Tracking (Sentry) - -1. Create Sentry project at [sentry.io](https://sentry.io) -2. Get your DSN -3. Add to environment variables: - -```bash -SENTRY_DSN=https://xxx@xxx.ingest.sentry.io/xxx -NEXT_PUBLIC_SENTRY_DSN=https://xxx@xxx.ingest.sentry.io/xxx -``` - -### Custom Domain - -1. Go to **Settings** → **Domains** -2. Add your domain (e.g., `openml.org`) -3. Update DNS records as instructed by Vercel -4. Vercel automatically provisions SSL certificate - ---- - -## Advanced: Deploy Your Own Backend - -If you want to deploy the Python backend (`server/`) separately: - -### Option 1: Railway - -1. Go to [railway.app](https://railway.app) -2. Create new project from GitHub -3. Select `server/` directory -4. Add environment variables (PostgreSQL, etc.) -5. Deploy - -### Option 2: Fly.io - -```bash -cd server -fly launch -fly deploy -``` - -### Update Frontend Environment Variables - -```bash -NEXT_PUBLIC_OPENML_URL=https://your-backend.railway.app -NEXT_PUBLIC_URL_API=https://your-backend.railway.app/api/v1 -``` - ---- - -## Troubleshooting - -### Build Errors - -**Issue**: Module not found errors - -- **Solution**: Check `package.json` dependencies are installed -- Run `npm install` locally to verify - -**Issue**: Environment variable not found - -- **Solution**: Ensure variables starting with `NEXT_PUBLIC_` are set in Vercel - -### Runtime Errors - -**Issue**: API calls failing - -- **Solution**: Check CORS settings on backend -- Verify `NEXT_PUBLIC_OPENML_URL` is correct - -**Issue**: Images not loading - -- **Solution**: Verify `remotePatterns` in `next.config.ts` -- Check MinIO URL is correct - -### Performance Issues - -**Issue**: Slow page loads - -- **Solution**: Enable Vercel Analytics to identify bottlenecks -- Use dynamic imports for heavy components: - ```tsx - const HeavyComponent = dynamic(() => import("./HeavyComponent"), { - loading: () => , - }); - ``` - ---- - -## Monitoring - -### Vercel Dashboard - -Monitor your deployment: - -- **Analytics**: Page views, performance metrics -- **Logs**: Runtime logs and errors -- **Deployments**: Build history and status - -### Health Checks - -Create a health check endpoint: - -```tsx -// app/api/health/route.ts -export async function GET() { - return Response.json({ status: "ok", timestamp: new Date().toISOString() }); -} -``` - -Test: `https://your-project.vercel.app/api/health` - ---- - -## Security Checklist - -- [ ] Environment variables set in Vercel (not in `.env.production`) -- [ ] Secrets (API keys, OAuth secrets) are private -- [ ] CORS configured correctly on backend -- [ ] OAuth callback URLs match deployment domain -- [ ] CSP headers configured (if needed) -- [ ] Rate limiting on API routes (if applicable) - ---- - -## Next Steps - -1. ✅ Deploy to Vercel -2. ✅ Configure environment variables -3. ✅ Test all pages and functionality -4. ⬜ Add custom domain -5. ⬜ Enable analytics -6. ⬜ Set up error tracking (Sentry) -7. ⬜ Configure OAuth for login -8. ⬜ Monitor performance and errors - ---- - -## Support - -- **Vercel Docs**: https://vercel.com/docs -- **Next.js Docs**: https://nextjs.org/docs -- **OpenML GitHub**: https://github.com/openml/openml.org diff --git a/app-next/TEAM_ROADMAP.md b/app-next/TEAM_ROADMAP.md deleted file mode 100644 index 3ef5ae6b..00000000 --- a/app-next/TEAM_ROADMAP.md +++ /dev/null @@ -1,208 +0,0 @@ -# OpenML Next.js - Deployment Roadmap - -## Strategy - -**Phase 1 (NOW)**: OpenML credentials login → Vercel + Development -**Phase 2 (LATER)**: OAuth + Sign-up → TU/e Production - ---- - -## Phase 1: OpenML Credentials Login ✅ - -### Status - -- ✅ Development working (localhost:3050) -- 🔄 Vercel ready for testing -- ⏳ TU/e backend needs CORS update - -### Required Backend Changes - -**CORS Configuration** (Flask): - -```python -CORS_ORIGINS = [ - "https://openmlorg.vercel.app", - "https://www.openml.org", - "http://localhost:3050" -] -``` - -**Endpoints** (should exist): - -- `POST /login` → Returns JWT -- `GET /profile` → Returns user data (needs JWT in Authorization header) -- `POST /image` → Avatar upload (optional) - -### Testing Checklist - -- [ ] Login at `https://openmlorg.vercel.app/auth/signin` -- [ ] Avatar/initials appear in header -- [ ] Profile page loads (`/auth/profile`) -- [ ] Session persists across navigation -- [ ] Logout works -- [ ] No CORS errors in console - ---- - -## Phase 2: OAuth & Sign-Up (Prepared) - -### Frontend Status - -✅ Code implemented, needs production config - -### Backend Requirements - -#### 1. GitHub OAuth - -**Create OAuth App**: - -``` -URL: https://github.com/settings/developers -Homepage: https://www.openml.org -Callback: https://www.openml.org/api/auth/callback/github -``` - -**Environment Variables**: - -```bash -GITHUB_ID= -GITHUB_SECRET= -``` - -**Backend Endpoint** (`POST /auth/oauth/github`): - -```python -# Request body: -{ - "provider": "github", - "providerId": "12345678", - "email": "user@example.com", - "name": "John Doe", - "image": "https://avatars.githubusercontent.com/..." -} - -# Response: -{ - "access_token": "jwt_token", - "id": 123, - "username": "johndoe" -} - -# Logic: -# 1. Find/create user by providerId or email -# 2. Link OAuth provider to account -# 3. Return JWT -``` - -#### 2. Google OAuth (Optional) - -**Create OAuth App**: - -``` -URL: https://console.cloud.google.com/ -Authorized origins: https://www.openml.org -Callback: https://www.openml.org/api/auth/callback/google -``` - -**Environment Variables**: - -```bash -GOOGLE_ID= -GOOGLE_SECRET= -``` - -**Backend**: Same as GitHub, endpoint `/auth/oauth/google` - -#### 3. Email Verification - -**SMTP Configuration**: - -```bash -SMTP_SERVER=smtp.sendgrid.net -SMTP_PORT=587 -SMTP_LOGIN=apikey -SMTP_PASS= -EMAIL_SENDER=noreply@openml.org -``` - -**Backend Endpoints**: - -- `POST /register` → Create inactive user, send verification email -- `GET /verify-email/` → Activate account - -#### 4. Database Schema - -```sql --- OAuth linking (option 1: add columns to users table) -ALTER TABLE users ADD oauth_provider VARCHAR(50); -ALTER TABLE users ADD oauth_provider_id VARCHAR(255); -ALTER TABLE users ADD email_verified BOOLEAN DEFAULT FALSE; - --- OAuth linking (option 2: separate table) -CREATE TABLE oauth_providers ( - id SERIAL PRIMARY KEY, - user_id INT REFERENCES users(id), - provider VARCHAR(50), - provider_id VARCHAR(255), - created_at TIMESTAMP DEFAULT NOW() -); -``` - ---- - -## Security Checklist (Production) - -```python -# Flask -JWT_SECRET_KEY = -APP_SECRET_KEY = -SESSION_COOKIE_SECURE = True -CSRF_COOKIE_SECURE = True - -# CORS (production only) -CORS_ORIGINS = ["https://www.openml.org", "https://openml.org"] - -# Database -DATABASE_URI = "postgresql://..." # SSL enabled -``` - ---- - -## Current Status - -| Feature | Dev | Vercel | Production | -| --------------- | --- | ------ | ---------- | -| OpenML Login | ✅ | 🔄 | ⏳ CORS | -| Avatar/Initials | ✅ | ✅ | ✅ | -| Profile Display | ✅ | ✅ | ✅ | -| Logout | ✅ | ✅ | ✅ | -| GitHub OAuth | ⚠️ | ❌ | ⏳ Setup | -| Google OAuth | ❌ | ❌ | ⏳ Setup | -| Sign Up | ⚠️ | ❌ | ⏳ SMTP | - -**Legend**: ✅ Working | 🔄 Ready | ⚠️ Partial | ❌ Not configured | ⏳ Needs setup - ---- - -## Next Steps - -### Immediate (This Week) - -1. Push to Vercel: `git push origin clean-app-next-v2` -2. Backend: Add Vercel domain to CORS -3. Test login flow on Vercel -4. Document issues - -### Future (Before TU/e Production) - -1. Create GitHub OAuth app -2. Configure SMTP (SendGrid recommended) -3. Implement backend OAuth endpoints -4. Update database schema -5. Configure production secrets - ---- - -**Version**: 1.0 -**Updated**: Dec 18, 2025 -**Status**: Phase 1 ready for Vercel testing diff --git a/app-next/check_groups.js b/app-next/check_groups.js deleted file mode 100644 index 12b88397..00000000 --- a/app-next/check_groups.js +++ /dev/null @@ -1,17 +0,0 @@ -import { getDb } from "./src/lib/db.js"; - -async function checkGroupsSchema() { - const db = await getDb(); - if (process.env.MYSQL_HOST) { - console.log("MySQL Schema check not implemented yet via this script."); - } else { - try { - const info = db.prepare("PRAGMA table_info(users_groups)").all(); - console.log("users_groups table info:", info); - } catch (e) { - console.error("Error accessing users_groups:", e.message); - } - } -} - -checkGroupsSchema().catch(console.error); diff --git a/app-next/check_passkeys.js b/app-next/check_passkeys.js deleted file mode 100644 index 16b103e0..00000000 --- a/app-next/check_passkeys.js +++ /dev/null @@ -1,23 +0,0 @@ -import { getDb } from "./src/lib/db.js"; - -async function checkPasskeySchema() { - const db = await getDb(); - if (process.env.MYSQL_HOST) { - console.log("MySQL Schema check not implemented yet via this script."); - } else { - try { - const info = db.prepare("PRAGMA table_info(user_passkeys)").all(); - console.log("user_passkeys table info:", info); - - const count = db.prepare("SELECT COUNT(*) as count FROM user_passkeys").get(); - console.log("Passkey count:", count); - - const sample = db.prepare("SELECT id, user_id, device_name FROM user_passkeys LIMIT 1").get(); - console.log("Sample passkey (text fields):", sample); - } catch (e) { - console.error("Error accessing user_passkeys:", e.message); - } - } -} - -checkPasskeySchema().catch(console.error); diff --git a/app-next/check_users.js b/app-next/check_users.js deleted file mode 100644 index 79ccdb1e..00000000 --- a/app-next/check_users.js +++ /dev/null @@ -1,17 +0,0 @@ -import { getDb } from "./src/lib/db.js"; - -async function checkUser() { - const db = await getDb(); - if (process.env.MYSQL_HOST) { - console.log("MySQL Schema check not implemented yet via this script."); - } else { - try { - const users = db.prepare("SELECT id, username, email, created_on FROM users ORDER BY id DESC LIMIT 5").all(); - console.log("Latest users:", users); - } catch (e) { - console.error("Error accessing users:", e.message); - } - } -} - -checkUser().catch(console.error);