Initial commit.

2025-12-21 23:48:01 +01:00
commit 16c71a2c59
6 changed files with 645 additions and 0 deletions
--- a/main.py
+++ b/main.py
@ -0,0 +1,217 @@
+from typing import Any, Tuple
+import zipfile
+from ebooklib import epub
+from pathlib import Path
+from pydantic import BaseModel, TypeAdapter
+from argparse import ArgumentParser
+import json
+
+
+class Author(BaseModel):
+    id: int
+    name: str
+
+
+class BookMedia(BaseModel):
+    name: str
+    rusName: str
+    engName: str
+    otherNames: list[str]
+    authors: list[Author]
+    summary: str
+
+
+class Branch(BaseModel):
+    id: int
+    branchId: int
+
+
+def process_text(text_obj: dict):
+    content = text_obj["text"]
+    for mark in text_obj.get("marks", []):
+        if mark["type"] == "italic":
+            content = f"<i>{content}</i>"
+        elif mark["type"] == "bold":
+            content = f"<b>{content}</b>"
+        else:
+            raise ValueError(f"unknown mark for text {text_obj}")
+    return content
+
+
+class Chapter(BaseModel):
+    id: int
+    volume: int
+    number: str
+    name: str
+    branches: list[Branch]
+    withBranches: bool
+
+    def get_zip_path(self, base_dir: Path) -> Path | None:
+        nobranch_zip = base_dir / f"v{self.volume}-n{self.number}-{self.id}.zip"
+        if nobranch_zip.exists():
+            return nobranch_zip
+        for branch in self.branches:
+            branch_zip = (
+                base_dir
+                / f"v{self.volume}-n{self.number}-{self.id}-b{branch.branchId}.zip"
+            )
+            if branch_zip.exists():
+                return branch_zip
+
+    def load(self, base_dir: Path) -> Tuple[epub.EpubHtml, list[Any]]:
+        title = f"Глава {self.number}"
+        book_item = epub.EpubHtml(
+            title=title,
+            file_name="Text/ch{}.xhtml".format(self.number),
+            lang="ru",
+        )
+
+        zip_path = self.get_zip_path(base_dir)
+        if zip_path is None:
+            raise FileNotFoundError(f"Chapter for `{self}` not found")
+        zip = zipfile.ZipFile(zip_path)
+        data = zip.read("data.txt")
+        try:
+            content = json.loads(data)
+        except Exception:
+            # It's not a json, so we just attach content and return.
+            data = data.decode("utf-8")
+            content = f"<h2>Глава {self.number}</h2>\n{data}"
+            book_item.content = content
+            return (book_item, [])
+
+        if content["type"] != "doc":
+            raise ValueError(f"{self} contains unknown document format")
+
+        output = []
+        extras = []
+
+        print(f"parsing {zip.filename}")
+        output.append(f"<h2>Глава {self.number}</h2>")
+        for item in content["content"]:
+            if item["type"] in {"paragraph", "heading"}:
+                inner = []
+                for sub_item in item.get("content", []):
+                    if sub_item["type"] == "text":
+                        inner.append(process_text(sub_item))
+                    elif sub_item["type"] == "hardBreak":
+                        inner.append("<br />")
+                    else:
+                        raise ValueError(f"{self} - Unknown sub-item")
+                inner_content = "\n".join(inner)
+                if item["type"] == "heading":
+                    attrs = item.get("attrs", {})
+                    level = attrs.get("level", 3)
+                    align = attrs.get("textAlign", "center")
+                    output.append(
+                        f'<h{level} align="{align}">{inner_content}</h{level}>'
+                    )
+                if item["type"] == "paragraph":
+                    output.append(f"<p>{inner_content}</p>")
+
+            elif item["type"] == "image":
+                for image in item["attrs"]["images"]:
+                    image_name = image["image"]
+                    image_path = f"{image_name}.png"
+                    image_item = epub.EpubImage(
+                        uid=image_name,
+                        file_name=image_path,
+                        content=zip.read(f"{image_name}.png"),
+                    )
+                    extras.append(image_item)
+                    output.append(f'<img src="../{image_path}" />')
+            elif item["type"] == "horizontalRule":
+                output.append("<hr/>")
+            else:
+                raise ValueError(f"{self} - unknown content type")
+
+        # Connect all items
+        book_item.content = "\n".join(output)
+
+        return (book_item, extras)
+
+    def add_to_book(self, book: epub.EpubBook, base_dir: Path):
+        (item, extras) = self.load(base_dir)
+        for extra in extras:
+            book.add_item(extra)
+        book.add_item(item)
+        book.spine.append(item)
+        book.toc.append(item)
+
+
+def parse_args():
+    parser = ArgumentParser()
+    parser.add_argument(
+        "--input-dir",
+        "-i",
+        dest="input",
+        type=Path,
+        help="Dumped book directory",
+        required=True,
+    )
+    parser.add_argument(
+        "--output-dir",
+        "-o",
+        dest="output",
+        type=Path,
+        default="output",
+        help="Where to put output EPUB files",
+    )
+    parser.add_argument(
+        "--volume",
+        type=int,
+        required=True,
+    )
+    parser.add_argument(
+        "--cover", "-c", type=Path, required=False, help="Path to cover image"
+    )
+    return parser.parse_args()
+
+
+def main():
+    args = parse_args()
+    cover: bytes | None = None
+    if args.cover is not None:
+        cover = args.cover.read_bytes()
+
+    info: BookMedia = BookMedia.model_validate(
+        json.load((args.input / "info.json").open())["media"]
+    )
+    chapters: list[Chapter] = list(
+        sorted(
+            filter(
+                # Filter volumes
+                lambda c: c.volume == args.volume,
+                TypeAdapter(list[Chapter]).validate_python(
+                    json.load((args.input / "chapters.json").open())
+                ),
+            ),
+            # Sort by chapter number
+            key=lambda c: float(c.number),
+        )
+    )
+    book = epub.EpubBook()
+    for author in info.authors:
+        book.add_author(author.name)
+    book.set_language("ru")
+    book.set_title(info.rusName)
+    book.add_metadata(namespace="DC", name="description", value=info.summary)
+
+    book.spine = []
+    if cover:
+        book.spine.insert(0, "cover")
+        book.set_cover(args.cover.name, cover)
+
+    for chapter in chapters:
+        chapter.add_to_book(book, args.input)
+
+    book.add_item(epub.EpubNcx())
+
+    output_path: Path = args.output / f"{info.rusName}-{args.volume}.epub"
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+
+    epub.write_epub(str(output_path), book)
+
+
+if __name__ == "__main__":
+    main()