Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions cms/envs/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -898,6 +898,9 @@ def make_lms_template_path(settings):

'openedx_events',

# Core models to represent courses
"openedx_catalog",

# Core apps that power libraries
"openedx_content",
*openedx_content_backcompat_apps_to_install(),
Expand Down
3 changes: 3 additions & 0 deletions lms/envs/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -2020,6 +2020,9 @@

'openedx_events',

# Core models to represent courses
"openedx_catalog",

# Core apps that power libraries
"openedx_content",
*openedx_content_backcompat_apps_to_install(),
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
"""
Data migration to populate the new CourseRun and CatalogCourse models.
"""

# Generated by Django 5.2.11 on 2026-02-13 21:47
import logging

from django.conf import settings
from django.db import migrations
from organizations.api import ensure_organization, exceptions as org_exceptions

log = logging.getLogger(__name__)

# https://github.com/openedx/openedx-platform/issues/38036
NORMALIZE_LANGUAGE_CODES = {
"zh-hans": "zh-cn",
"zh-hant": "zh-hk",
"ca@valencia": "ca-es-valencia",
}


def backfill_openedx_catalog(apps, schema_editor):
"""
Populate the new CourseRun and CatalogCourse models.
"""
# CourseOverview is a cache model derived from modulestore; modulestore is the source of truth for courses, so we'll
# use it to get the list of "all courses on the system" to populate the new CourseRun and CatalogCourse models.
CourseIndex = apps.get_model("split_modulestore_django", "SplitModulestoreCourseIndex")
CourseOverview = apps.get_model("course_overviews", "CourseOverview")
CatalogCourse = apps.get_model("openedx_catalog", "CatalogCourse")
CourseRun = apps.get_model("openedx_catalog", "CourseRun")

created_catalog_course_ids: set[int] = set()
all_course_runs = CourseIndex.objects.filter(base_store="mongodb", library_version="").order_by("course_id")
for course_idx in all_course_runs:
org_code: str = course_idx.course_id.org
course_code: str = course_idx.course_id.course
run_code: str = course_idx.course_id.run

# Ensure that the Organization exists.
try:
org_data = ensure_organization(org_code)
except org_exceptions.InvalidOrganizationException as exc:
# Note: IFF the org exists among the modulestore courses but not in the Organizations database table,
# and if auto-create is disabled (it's enabled by default), this will raise InvalidOrganizationException. It
# would be up to the operator to decide how they want to resolve that.
raise ValueError(
f'The organization short code "{org_code}" exists in modulestore ({course_idx.course_id}) but '
"not the Organizations table, and auto-creating organizations is disabled. You can resolve this by "
"creating the Organization manually (e.g. from the Django admin) or turning on auto-creation. "
"You can set active=False to prevent this Organization from being used other than for historical data. "
) from exc
if org_data["short_name"] != org_code:
# On most installations, the 'short_name' database column is case insensitive (unfortunately)
log.warning(
'The course with ID "%s" does not match its Organization.short_name "%s"',
course_idx.course_id,
org_data["short_name"],
)

# Fetch the CourseOverview if it exists
try:
course_overview = CourseOverview.objects.get(id=course_idx.course_id)
except CourseOverview.DoesNotExist:
course_overview = None # Course exists in modulestore but details aren't cached into CourseOverview yet
display_name: str = (course_overview.display_name if course_overview else None) or course_code

# Determine the course language.
# Note that in Studio, the options for course language generally came from the ALL_LANGUAGES setting, which is
# mostly two-letter language codes with no locale, except it uses "zh_HANS" for Mandarin and "zh_HANT" for
# Cantonese. We normalize those to "zh-cn" and "zh-hk" for consistency with our platform UI languages /
# Transifex, but you can still access the "old" version using the CatalogCourse.language_short
# getter/setter for backwards compatbility. See https://github.com/openedx/openedx-platform/issues/38036
language = settings.LANGUAGE_CODE
if course_overview and course_overview.language:
language = course_overview.language.lower()
language = language.replace("_", "-") # Ensure we use hyphens for consistency (`en-us` not `en_us`)
# Normalize this language code. The previous/non-normalized code will still be available via the
# "language_short" property for backwards compatibility.
language = NORMALIZE_LANGUAGE_CODES.get(language, language)
if len(language) > 2 and language[2] != "-":
# This seems like an invalid value; revert to the default:
log.warning(
'The course with ID "%s" has invalid language "%s" - using default language "%s" instead.',
course_idx.course_id,
language,
settings.LANGUAGE_CODE,
)
language = settings.LANGUAGE_CODE

# Ensure that the CatalogCourse exists.
cc, cc_created = CatalogCourse.objects.get_or_create(
org_id=org_data["id"],
course_code=course_code,
defaults={
"display_name": display_name,
"language": language,
},
)
if cc_created:
created_catalog_course_ids.add(cc.pk)
elif cc.pk in created_catalog_course_ids:
# This CatalogCourse was previously created during this same migration
# Check if all the runs have the same display_name:
if (
course_overview
and course_overview.display_name
and course_overview.display_name != cc.display_name
and cc.display_name != course_code
):
# The runs have different names, so just use the course code as the common catalog course name.
cc.display_name = course_code
cc.save(update_fields=["display_name"])

if cc.course_code != course_code:
raise ValueError(
f"The course {course_idx.course_id} exists in modulestore with a different capitalization of its "
f'course code compared to other instances of the same run ("{course_code}" vs "{cc.course_code}"). '
"This really should not happen. To fix it, delete the inconsistent course runs (!). "
)

# Create the CourseRun
new_run, run_created = CourseRun.objects.get_or_create(
catalog_course=cc,
run_code=run_code,
course_key=course_idx.course_id,
defaults={"display_name": display_name},
)

# Correct the "created" timestamp. Since it has auto_now_add=True, we can't set its value except using update()
# The CourseOverview should have the "created" date unless it's missing or the course was created before
# the CourseOverview model existed. In any case, it should be good enough. Otherwise use the default (now).
if course_overview:
if course_overview.created < cc.created and cc.pk in created_catalog_course_ids:
# Use the 'created' date from the oldest course run that we process.
CatalogCourse.objects.filter(pk=cc.pk).update(created=course_overview.created)
if run_created:
CourseRun.objects.filter(pk=new_run.pk).update(created=course_overview.created)


class Migration(migrations.Migration):
dependencies = [
("openedx_catalog", "0001_initial"),
("course_overviews", "0029_alter_historicalcourseoverview_options"),
("split_modulestore_django", "0003_alter_historicalsplitmodulestorecourseindex_options"),
]

operations = [
migrations.RunPython(backfill_openedx_catalog, reverse_code=migrations.RunPython.noop),
]
60 changes: 59 additions & 1 deletion openedx/core/djangoapps/content/course_overviews/signals.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,15 @@
Signal handler for invalidating cached course overviews
"""


import logging

from django.db import transaction
from django.db.models.signals import post_save
from django.dispatch import Signal
from django.dispatch.dispatcher import receiver

from openedx_catalog import api as catalog_api
from openedx_catalog.models_api import CourseRun
from openedx.core.djangoapps.signals.signals import COURSE_CERT_DATE_CHANGE
from xmodule.data import CertificatesDisplayBehaviors
from xmodule.modulestore.django import SignalHandler
Expand All @@ -33,6 +34,8 @@ def _listen_for_course_publish(sender, course_key, **kwargs): # pylint: disable
"""
Catches the signal that a course has been published in Studio and updates the corresponding CourseOverview cache
entry.

Also sync course data to the openedx_catalog CourseRun model.
"""
try:
previous_course_overview = CourseOverview.objects.get(id=course_key)
Expand All @@ -41,6 +44,51 @@ def _listen_for_course_publish(sender, course_key, **kwargs): # pylint: disable
updated_course_overview = CourseOverview.load_from_module_store(course_key)
_check_for_course_changes(previous_course_overview, updated_course_overview)

# Currently, SplitModulestoreCourseIndex is the ultimate source of truth for
# which courses exist. When a course is published, we sync that data to
# CourseOverview, and from CourseOverview to CourseRun.

# In the future, CourseRun will be the "source of truth" and each CourseRun
# may optionally point to content and get synced to CourseOverview.

# Ensure a CourseRun exists for this course
try:
course_run = catalog_api.get_course_run(course_key)
except CourseRun.DoesNotExist:
# Presumably this is a newly-created course. Create the CourseRun.
course_run = catalog_api.create_course_run_for_modulestore_course_with(
course_key=course_key,
display_name=updated_course_overview.display_name,
language_short=updated_course_overview.language,
)

# Keep the CourseRun up to date as the course is edited:
if updated_course_overview.display_name != course_run.display_name:
catalog_api.sync_course_run_details(course_key, display_name=updated_course_overview.display_name)
# If this course is the only run in the CatalogCourse, should we update the display_name of
# the CatalogCourse to match the run's new name? Currently the only way to edit the name of
# a CatalogCourse is via the Django admin. But it's also not used anywhere yet.

if (
updated_course_overview.language
and updated_course_overview.language != course_run.catalog_course.language_short
):
if course_run.catalog_course.runs.count() == 1:
# This is the only run in this CatalogCourse. Update the language of the CatalogCourse
catalog_api.update_catalog_course(
course_run.catalog_course,
language_short=updated_course_overview.language,
)
else:
LOG.warning(
'Course run "%s" language "%s" does not match its catalog course language, "%s"',
str(course_key),
updated_course_overview.language,
course_run.catalog_course.language_short,
)

# In the future, this will also sync schedule and other metadata to the CourseRun's related models


@receiver(SignalHandler.course_deleted)
def _listen_for_course_delete(sender, course_key, **kwargs): # pylint: disable=unused-argument
Expand All @@ -56,6 +104,16 @@ def _listen_for_course_delete(sender, course_key, **kwargs): # pylint: disable=
sender=None,
courserun_key=courserun_key,
)
# Delete the openedx_catalog CourseRun to keep it in sync:
try:
course_run_obj = catalog_api.get_course_run(course_key)
except CourseRun.DoesNotExist:
pass
else:
catalog_course = course_run_obj.catalog_course
catalog_api.delete_course_run(course_key)
if catalog_course.runs.count() == 0:
catalog_api.delete_catalog_course(catalog_course)


@receiver(post_save, sender=CourseOverview)
Expand Down
Loading
Loading