FanFicDev · mscout1 · Feb 1, 2025 · iridescent-beacon · Feb 25, 2025
diff --git a/adapter/__init__.py b/adapter/__init__.py
@@ -15,6 +15,7 @@
 from adapter.fictionPressAdapter import FictionPressAdapter
 from adapter.hpFanficArchiveAdapter import HpFanficArchiveAdapter
 from adapter.hpffAdapter import HarryPotterFanfictionAdapter
+from adapter.mcstoriesAdapter import McStoriesAdapter
 from adapter.parahumansAdapter import ParahumansAdapter
 from adapter.portkeyArchiveAdapter import PortkeyArchiveAdapter
 from adapter.questionableQuestingAdapter import QuestionableQuestingAdapter
@@ -45,6 +46,7 @@ def registerAdapters() -> None:
     adapters[FicType.sufficientvelocity] = SufficientVelocityAdapter()
     adapters[FicType.questionablequesting] = QuestionableQuestingAdapter()
     adapters[FicType.harrypotterfanfiction] = HarryPotterFanfictionAdapter()
+    adapters[FicType.mcstories] = McStoriesAdapter()
     adapters[FicType.parahumans] = ParahumansAdapter()
     adapters[FicType.adultfanfiction] = AdultFanfictionAdapter()
     adapters[FicType.fanficsme] = FanficsMeAdapter()

diff --git a/adapter/mcstoriesAdapter.py b/adapter/mcstoriesAdapter.py
@@ -0,0 +1,182 @@
+import re
+import urllib.parse
+from typing import Optional
+
+from bs4 import BeautifulSoup
+
+import scrape
+import util
+from adapter.adapter import Adapter, edumpContent
+from htypes import FicType, FicId
+from schema import OilTimestamp
+from store import Fic, Language
+from store import FicStatus
+
+story_index_path_re = re.compile(r"/(?P<lid>\w+)/index\.html")
+story_chapter_path_re = re.compile(r"/(?P<lid>\w+)/(?P<clid>\w+)\.html")
+relative_chapter_path_re = re.compile(r"(?P<clid>\w+)\.html")
+
+
+def strip_nonnumeric(s):
+    return ''.join(c for c in s if c.isdigit())
+
+
+class McStoriesAdapter(Adapter):
+    def __init__(self) -> None:
+        super().__init__(
+            True,
+            "https://mcstories.com/",
+            "mcstories.com",
+            FicType.mcstories,
+            "mcstories",
+        )
+
+    def tryParseUrl(self, url: str) -> Optional[FicId]:
+        path = urllib.parse.urlparse(url).path
+
+        if m := story_index_path_re.fullmatch(path):
+            lid = m.group("lid")
+        elif m := story_chapter_path_re.fullmatch(path):
+            lid = m.group("clid")
+        else:
+            return None
+        return FicId(self.ftype, lid)
+
+    def constructUrl(self, fic, chapter=None):
+        if chapter is None:
+            path = f"/{fic.localId}/index.html"
+        else:
+            path = f"/{fic.localId}/{chapter.localId}.html"
+        return urllib.parse.urljoin(self.baseUrl, path)
+
+    def create(self, fic: Fic) -> Fic:
+
+        fic.url = self.constructUrl(fic)
+
+        data = scrape.scrape(fic.url)
+        edumpContent(data["raw"], "mcstories")
+        fic = self.parseInfoInto(fic, data["raw"])
+        fic.upsert()
+
+        return fic
+
+    def absolute_author_link(self, rel_author_link):
+        if rel_author_link.startswith('..'):
+            path = rel_author_link[len('..'):]
+        else:
+            path = rel_author_link
+        return urllib.parse.urljoin(self.baseUrl, path)
+
+    def absolute_chapter_link(self, fic, rel_chapter_link):
+        path = f"{fic.localId}/{rel_chapter_link}"
+        return urllib.parse.urljoin(self.baseUrl, path)
+
+    def get_chapter_meta(self, table, fic):
+        chapters = []
+        for chapter_row_dict in util.dicts_from_table(table):
+            name_cell = chapter_row_dict['Chapter']
+            words_cell = chapter_row_dict['Length']
+            date_cell = chapter_row_dict['Added']
+
+            updated_cell = chapter_row_dict.get('Updated', date_cell)
+            relative_chapter_path = name_cell.find('a')['href']
+            clid = relative_chapter_path_re.fullmatch(relative_chapter_path).group('clid')
+            published = util.parseDateAsUnix(date_cell.string, fic.fetched)
+
+            if updated_cell.string and updated_cell.string.strip():
+                updated = util.parseDateAsUnix(updated_cell.string, fic.fetched)
+            else:
+                updated = published
+            chapters.append({
+                'clid': clid,
+                'title': name_cell.string,
+                'chapter_link': self.absolute_chapter_link(fic, relative_chapter_path),
+                'words': int(strip_nonnumeric(words_cell.string)),
+                'published': published,
+                'updated': updated,
+            })
+        return chapters
+
+    def parseInfoInto(self, fic: Fic, html: str):
+        soup = BeautifulSoup(html, "html.parser")
+
+        fic.fetched = OilTimestamp.now()
+        fic.languageId = Language.getId("English")  # All stories on mcstories are presumed english
+        fic.title = soup.find(class_="title").string.strip()
+        fic.description = soup.find(class_="synopsis").get_text().strip()
+        fic.ageRating = "M"  # *EROTIC* Mind Control Story Archive
+
+        date_strings = soup.find_all('h3', class_='dateline')
+        published_date_string = date_strings.pop(0).string[len("Added "):]  # "Added 18 October 2014"
+        if date_strings:
+            updated_date_string = date_strings.pop(0).string[len("Updated "):]  # "Updated 18 October 2014"
+        else:
+            updated_date_string = published_date_string
+        publishedUts = util.parseDateAsUnix(published_date_string, fic.fetched)
+        updatedUts = util.parseDateAsUnix(updated_date_string, fic.fetched)
+        fic.published = OilTimestamp(publishedUts)
+
+        chapter_table = soup.find('table', class_='index')
+        if chapter_table:
+            chapters = self.get_chapter_meta(chapter_table, fic)
+        else:
+            chapter_div = soup.find('div', class_='chapter')
+            link = chapter_div.find('a')
+            relative_chapter_path = link['href']
+            clid = relative_chapter_path_re.fullmatch(relative_chapter_path).group('clid')
+
+            chapters = [
+             {
+                'clid': clid,
+                'title': link.string,
+                'chapter_link': self.absolute_chapter_link(fic, relative_chapter_path),
+                'words': int(strip_nonnumeric(link.next_sibling)),
+                'published': publishedUts,
+                'updated': publishedUts
+            }]
+
+        fic.chapterCount = len(chapters)
+        fic.wordCount = sum(chapter['words'] for chapter in chapters)
+
+        fic.reviewCount = 0
+        fic.favoriteCount = 0
+        fic.followCount = 0
+
+        # The update date of the last chapter is the best estimate of this story's last update
+        all_updates = [updatedUts] + [chapter['updated'] for chapter in chapters]
+        fic.updated = OilTimestamp(max(all_updates))
+
+        fic.ficStatus = FicStatus.ongoing  # TODO: No indication on this site.
+
+        byline = soup.find("h3", class_="byline")
+        authorLink = self.absolute_author_link(byline.find("a")['href'])
+
+        authorUrl = authorLink
+        author = byline.find("a").string
+        authorId = author  # map pseudo to real?
+        self.setAuthor(fic, author, authorUrl, authorId)
+        fic.upsert()
+
+        for cid, chapter_meta in enumerate(chapters, 1):
+            chap = fic.chapter(cid)
+            chap.url = chapter_meta['chapter_link']
+            chap.localChapterId = chapter_meta['clid']
+            chap.title = chapter_meta['title']
+            chap.upsert()
+
+        return fic
+
+    # extract the html text which contains the story itself
+    def extractContent(self, fic: Fic, html: str) -> str:
+        soup = BeautifulSoup(html, "html.parser")
+        article = soup.find('article')
+        if article is None:
+            edumpContent(html, "mcstories_ec")
+            raise Exception("unable to find chapters, e-dumped")
+
+        return str(article)
+
+    def getCurrentInfo(self, fic: Fic) -> Fic:
+        fic.url = self.constructUrl(fic)
+        data = scrape.scrape(fic.url)
+        return self.parseInfoInto(fic, data["raw"])
diff --git a/alexandria_api_requirements.txt b/alexandria_api_requirements.txt
@@ -8,7 +8,7 @@ idna==2.10
 itsdangerous==2.0.0
 Jinja2==3.0.0
 MarkupSafe==2.0.0
-psycopg2==2.9.9
+psycopg2-binary==2.9.9
 python-dateutil==2.8.1
 requests==2.31.0
 six==1.16.0

diff --git a/htypes.py b/htypes.py
@@ -36,6 +36,7 @@ class FicType(IntEnum):
     fanficparadisesfw = 24
     fanficparadisensfw = 25
     wanderinginn = 26
+    mcstories = 27
 
 
 def adaptFicType(ftype: FicType) -> AsIs:

diff --git a/sql/minerva/addSources.sql b/sql/minerva/addSources.sql
@@ -24,6 +24,7 @@ insert into source(url, name, description) values
 	('https://thefanfictionforum.net/xenforo/index.php', 'The Fanfiction Forum', 'The Fanfiction Forum'),
 	('https://www.fanficparadise.com/fpforum-sfw/index.php', 'Fanfiction Paradise SFW', 'Fanfiction Paradise SFW'),
 	('https://www.fanficparadise.com/fpforum-nsfw/index.php', 'Fanfiction Paradise NSFW', 'Fanfiction Paradise NSFW'),
-	('https://wanderinginn.com/', 'The Wandering Inn', 'A tale of a girl, an inn, and a world full of levels')
+	('https://wanderinginn.com/', 'The Wandering Inn', 'A tale of a girl, an inn, and a world full of levels'),
+	('https://mcstories.com/', 'The Erotic Mind-Control Story Archive', 'An archive of erotic mind control stories')
 on conflict do nothing;
 
diff --git a/util.py b/util.py
@@ -353,3 +353,13 @@ def decodeCloudFlareEmail(email: str) -> str:
     octets = [int(email[i : i + 2], 16) for i in range(0, len(email), 2)]
     key, ebytes = octets[0], octets[1:]
     return "".join([chr(o ^ key) for o in ebytes])
+
+
+def dicts_from_table(bs_table):
+    headers = [header.text for header in bs_table.find_all('th')]
+    results = [
+                {headers[i]: cell for i, cell in enumerate(row.find_all('td'))}
+                for row in bs_table.find_all('tr')
+    ]
+    results = [r for r in results if r]
+    return results