Added translators mention.

This commit is contained in:
2025-12-23 00:37:09 +01:00
parent 57dca56789
commit ad86960c23

59
main.py
View File

@ -10,6 +10,7 @@ import json
import re import re
from requests_tor import RequestsTor from requests_tor import RequestsTor
import requests import requests
import hashlib
IMAGE_REMOVE_REGEX = re.compile(r"""<img\s+.*src\s*=\s*\"(?P<src>https?.*)\".*/>""") IMAGE_REMOVE_REGEX = re.compile(r"""<img\s+.*src\s*=\s*\"(?P<src>https?.*)\".*/>""")
@ -28,9 +29,17 @@ class BookMedia(BaseModel):
summary: str summary: str
class BranchTeam(BaseModel):
name: str
class Branch(BaseModel): class Branch(BaseModel):
id: int id: int
branchId: int branchId: int
teams: list[BranchTeam]
IMG_HASHES: dict[bytes, str] = {}
def process_text(text_obj: dict): def process_text(text_obj: dict):
@ -53,6 +62,18 @@ class Chapter(BaseModel):
branches: list[Branch] branches: list[Branch]
withBranches: bool withBranches: bool
def get_translators(self, base_dir: Path) -> list[str]:
if len(self.branches) == 1:
return [team.name for team in self.branches[0].teams]
for branch in self.branches:
branch_zip = (
base_dir
/ f"v{self.volume}-n{self.number}-{self.id}-b{branch.branchId}.zip"
)
if branch_zip.exists():
return [team.name for team in branch.teams]
return []
def get_zip_path(self, base_dir: Path) -> Path | None: def get_zip_path(self, base_dir: Path) -> Path | None:
nobranch_zip = base_dir / f"v{self.volume}-n{self.number}-{self.id}.zip" nobranch_zip = base_dir / f"v{self.volume}-n{self.number}-{self.id}.zip"
if nobranch_zip.exists(): if nobranch_zip.exists():
@ -83,6 +104,7 @@ class Chapter(BaseModel):
if zip_path is None: if zip_path is None:
raise FileNotFoundError(f"Chapter for `{self}` not found") raise FileNotFoundError(f"Chapter for `{self}` not found")
zip = zipfile.ZipFile(zip_path) zip = zipfile.ZipFile(zip_path)
print(f"Loaded {zip.filename}")
data = zip.read("data.txt") data = zip.read("data.txt")
try: try:
content = json.loads(data) content = json.loads(data)
@ -99,7 +121,6 @@ class Chapter(BaseModel):
output = [] output = []
extras = [] extras = []
print(f"parsing {zip.filename}")
output.append(f"<h2>Глава {self.number}</h2>") output.append(f"<h2>Глава {self.number}</h2>")
for item in content["content"]: for item in content["content"]:
if item["type"] in {"paragraph", "heading"}: if item["type"] in {"paragraph", "heading"}:
@ -189,12 +210,27 @@ class Chapter(BaseModel):
new_content = new_content.replace(target_str, "") # type: ignore new_content = new_content.replace(target_str, "") # type: ignore
continue continue
md5 = hashlib.md5(resp).digest()
final_path = None
if md5 in IMG_HASHES:
similar_path = IMG_HASHES[md5]
img_bytes = (cache / similar_path).read_bytes()
if img_bytes == resp:
print("Found identical image in cache")
final_path = similar_path
else:
IMG_HASHES[md5] = strip_path
if final_path is None:
img = epub.EpubImage( img = epub.EpubImage(
file_name=strip_path, file_name=strip_path,
content=resp, content=resp,
) )
replaces.append(img) replaces.append(img)
newiimage = f'<img src="../{strip_path}" />' final_path = strip_path
newiimage = f'<img loading="lazy" src="../{final_path}" />'
print(target_str, newiimage)
new_content = new_content.replace(target_str, newiimage) # type: ignore new_content = new_content.replace(target_str, newiimage) # type: ignore
page.content = new_content page.content = new_content
@ -298,7 +334,7 @@ def main():
book.set_title(info.rusName) book.set_title(info.rusName)
book.add_metadata(namespace="DC", name="description", value=info.summary) book.add_metadata(namespace="DC", name="description", value=info.summary)
book.spine = [] book.spine = ["translators"]
if cover: if cover:
book.spine.insert(0, "cover") book.spine.insert(0, "cover")
book.set_cover(args.cover.name, cover) book.set_cover(args.cover.name, cover)
@ -314,8 +350,25 @@ def main():
print("Using default client") print("Using default client")
client = requests.Session() client = requests.Session()
translators_page = epub.EpubHtml(
file_name="translators.xhtml",
title="Translators",
lang="ru",
)
book.add_item(translators_page)
book.toc.append(translators_page)
book.spine.append(translators_page) # type: ignore
translators = set()
for chapter in chapters: for chapter in chapters:
chapter.add_to_book(book, args.input, args.fetch_images, client, args.cache) chapter.add_to_book(book, args.input, args.fetch_images, client, args.cache)
translators.update(chapter.get_translators(args.input))
translators_content = '<h2 align="center">Переводчики:</h2>\n<ul>\n'
for translator in sorted(translators):
translators_content += f"<li>{translator}</li>\n"
translators_content += "</ul>\n"
translators_page.content = translators_content
book.add_item(epub.EpubNcx()) book.add_item(epub.EpubNcx())