Initial commit.
This commit is contained in:
217
main.py
Normal file
217
main.py
Normal file
@ -0,0 +1,217 @@
|
||||
from typing import Any, Tuple
|
||||
import zipfile
|
||||
from ebooklib import epub
|
||||
from pathlib import Path
|
||||
from pydantic import BaseModel, TypeAdapter
|
||||
from argparse import ArgumentParser
|
||||
import json
|
||||
|
||||
|
||||
class Author(BaseModel):
|
||||
id: int
|
||||
name: str
|
||||
|
||||
|
||||
class BookMedia(BaseModel):
|
||||
name: str
|
||||
rusName: str
|
||||
engName: str
|
||||
otherNames: list[str]
|
||||
authors: list[Author]
|
||||
summary: str
|
||||
|
||||
|
||||
class Branch(BaseModel):
|
||||
id: int
|
||||
branchId: int
|
||||
|
||||
|
||||
def process_text(text_obj: dict):
|
||||
content = text_obj["text"]
|
||||
for mark in text_obj.get("marks", []):
|
||||
if mark["type"] == "italic":
|
||||
content = f"<i>{content}</i>"
|
||||
elif mark["type"] == "bold":
|
||||
content = f"<b>{content}</b>"
|
||||
else:
|
||||
raise ValueError(f"unknown mark for text {text_obj}")
|
||||
return content
|
||||
|
||||
|
||||
class Chapter(BaseModel):
|
||||
id: int
|
||||
volume: int
|
||||
number: str
|
||||
name: str
|
||||
branches: list[Branch]
|
||||
withBranches: bool
|
||||
|
||||
def get_zip_path(self, base_dir: Path) -> Path | None:
|
||||
nobranch_zip = base_dir / f"v{self.volume}-n{self.number}-{self.id}.zip"
|
||||
if nobranch_zip.exists():
|
||||
return nobranch_zip
|
||||
for branch in self.branches:
|
||||
branch_zip = (
|
||||
base_dir
|
||||
/ f"v{self.volume}-n{self.number}-{self.id}-b{branch.branchId}.zip"
|
||||
)
|
||||
if branch_zip.exists():
|
||||
return branch_zip
|
||||
|
||||
def load(self, base_dir: Path) -> Tuple[epub.EpubHtml, list[Any]]:
|
||||
title = f"Глава {self.number}"
|
||||
book_item = epub.EpubHtml(
|
||||
title=title,
|
||||
file_name="Text/ch{}.xhtml".format(self.number),
|
||||
lang="ru",
|
||||
)
|
||||
|
||||
zip_path = self.get_zip_path(base_dir)
|
||||
if zip_path is None:
|
||||
raise FileNotFoundError(f"Chapter for `{self}` not found")
|
||||
zip = zipfile.ZipFile(zip_path)
|
||||
data = zip.read("data.txt")
|
||||
try:
|
||||
content = json.loads(data)
|
||||
except Exception:
|
||||
# It's not a json, so we just attach content and return.
|
||||
data = data.decode("utf-8")
|
||||
content = f"<h2>Глава {self.number}</h2>\n{data}"
|
||||
book_item.content = content
|
||||
return (book_item, [])
|
||||
|
||||
if content["type"] != "doc":
|
||||
raise ValueError(f"{self} contains unknown document format")
|
||||
|
||||
output = []
|
||||
extras = []
|
||||
|
||||
print(f"parsing {zip.filename}")
|
||||
output.append(f"<h2>Глава {self.number}</h2>")
|
||||
for item in content["content"]:
|
||||
if item["type"] in {"paragraph", "heading"}:
|
||||
inner = []
|
||||
for sub_item in item.get("content", []):
|
||||
if sub_item["type"] == "text":
|
||||
inner.append(process_text(sub_item))
|
||||
elif sub_item["type"] == "hardBreak":
|
||||
inner.append("<br />")
|
||||
else:
|
||||
raise ValueError(f"{self} - Unknown sub-item")
|
||||
inner_content = "\n".join(inner)
|
||||
if item["type"] == "heading":
|
||||
attrs = item.get("attrs", {})
|
||||
level = attrs.get("level", 3)
|
||||
align = attrs.get("textAlign", "center")
|
||||
output.append(
|
||||
f'<h{level} align="{align}">{inner_content}</h{level}>'
|
||||
)
|
||||
if item["type"] == "paragraph":
|
||||
output.append(f"<p>{inner_content}</p>")
|
||||
|
||||
elif item["type"] == "image":
|
||||
for image in item["attrs"]["images"]:
|
||||
image_name = image["image"]
|
||||
image_path = f"{image_name}.png"
|
||||
image_item = epub.EpubImage(
|
||||
uid=image_name,
|
||||
file_name=image_path,
|
||||
content=zip.read(f"{image_name}.png"),
|
||||
)
|
||||
extras.append(image_item)
|
||||
output.append(f'<img src="../{image_path}" />')
|
||||
elif item["type"] == "horizontalRule":
|
||||
output.append("<hr/>")
|
||||
else:
|
||||
raise ValueError(f"{self} - unknown content type")
|
||||
|
||||
# Connect all items
|
||||
book_item.content = "\n".join(output)
|
||||
|
||||
return (book_item, extras)
|
||||
|
||||
def add_to_book(self, book: epub.EpubBook, base_dir: Path):
|
||||
(item, extras) = self.load(base_dir)
|
||||
for extra in extras:
|
||||
book.add_item(extra)
|
||||
book.add_item(item)
|
||||
book.spine.append(item)
|
||||
book.toc.append(item)
|
||||
|
||||
|
||||
def parse_args():
|
||||
parser = ArgumentParser()
|
||||
parser.add_argument(
|
||||
"--input-dir",
|
||||
"-i",
|
||||
dest="input",
|
||||
type=Path,
|
||||
help="Dumped book directory",
|
||||
required=True,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--output-dir",
|
||||
"-o",
|
||||
dest="output",
|
||||
type=Path,
|
||||
default="output",
|
||||
help="Where to put output EPUB files",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--volume",
|
||||
type=int,
|
||||
required=True,
|
||||
)
|
||||
parser.add_argument(
|
||||
"--cover", "-c", type=Path, required=False, help="Path to cover image"
|
||||
)
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def main():
|
||||
args = parse_args()
|
||||
cover: bytes | None = None
|
||||
if args.cover is not None:
|
||||
cover = args.cover.read_bytes()
|
||||
|
||||
info: BookMedia = BookMedia.model_validate(
|
||||
json.load((args.input / "info.json").open())["media"]
|
||||
)
|
||||
chapters: list[Chapter] = list(
|
||||
sorted(
|
||||
filter(
|
||||
# Filter volumes
|
||||
lambda c: c.volume == args.volume,
|
||||
TypeAdapter(list[Chapter]).validate_python(
|
||||
json.load((args.input / "chapters.json").open())
|
||||
),
|
||||
),
|
||||
# Sort by chapter number
|
||||
key=lambda c: float(c.number),
|
||||
)
|
||||
)
|
||||
book = epub.EpubBook()
|
||||
for author in info.authors:
|
||||
book.add_author(author.name)
|
||||
book.set_language("ru")
|
||||
book.set_title(info.rusName)
|
||||
book.add_metadata(namespace="DC", name="description", value=info.summary)
|
||||
|
||||
book.spine = []
|
||||
if cover:
|
||||
book.spine.insert(0, "cover")
|
||||
book.set_cover(args.cover.name, cover)
|
||||
|
||||
for chapter in chapters:
|
||||
chapter.add_to_book(book, args.input)
|
||||
|
||||
book.add_item(epub.EpubNcx())
|
||||
|
||||
output_path: Path = args.output / f"{info.rusName}-{args.volume}.epub"
|
||||
output_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
epub.write_epub(str(output_path), book)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user