Added translators mention.
This commit is contained in:
59
main.py
59
main.py
@ -10,6 +10,7 @@ import json
|
|||||||
import re
|
import re
|
||||||
from requests_tor import RequestsTor
|
from requests_tor import RequestsTor
|
||||||
import requests
|
import requests
|
||||||
|
import hashlib
|
||||||
|
|
||||||
IMAGE_REMOVE_REGEX = re.compile(r"""<img\s+.*src\s*=\s*\"(?P<src>https?.*)\".*/>""")
|
IMAGE_REMOVE_REGEX = re.compile(r"""<img\s+.*src\s*=\s*\"(?P<src>https?.*)\".*/>""")
|
||||||
|
|
||||||
@ -28,9 +29,17 @@ class BookMedia(BaseModel):
|
|||||||
summary: str
|
summary: str
|
||||||
|
|
||||||
|
|
||||||
|
class BranchTeam(BaseModel):
|
||||||
|
name: str
|
||||||
|
|
||||||
|
|
||||||
class Branch(BaseModel):
|
class Branch(BaseModel):
|
||||||
id: int
|
id: int
|
||||||
branchId: int
|
branchId: int
|
||||||
|
teams: list[BranchTeam]
|
||||||
|
|
||||||
|
|
||||||
|
IMG_HASHES: dict[bytes, str] = {}
|
||||||
|
|
||||||
|
|
||||||
def process_text(text_obj: dict):
|
def process_text(text_obj: dict):
|
||||||
@ -53,6 +62,18 @@ class Chapter(BaseModel):
|
|||||||
branches: list[Branch]
|
branches: list[Branch]
|
||||||
withBranches: bool
|
withBranches: bool
|
||||||
|
|
||||||
|
def get_translators(self, base_dir: Path) -> list[str]:
|
||||||
|
if len(self.branches) == 1:
|
||||||
|
return [team.name for team in self.branches[0].teams]
|
||||||
|
for branch in self.branches:
|
||||||
|
branch_zip = (
|
||||||
|
base_dir
|
||||||
|
/ f"v{self.volume}-n{self.number}-{self.id}-b{branch.branchId}.zip"
|
||||||
|
)
|
||||||
|
if branch_zip.exists():
|
||||||
|
return [team.name for team in branch.teams]
|
||||||
|
return []
|
||||||
|
|
||||||
def get_zip_path(self, base_dir: Path) -> Path | None:
|
def get_zip_path(self, base_dir: Path) -> Path | None:
|
||||||
nobranch_zip = base_dir / f"v{self.volume}-n{self.number}-{self.id}.zip"
|
nobranch_zip = base_dir / f"v{self.volume}-n{self.number}-{self.id}.zip"
|
||||||
if nobranch_zip.exists():
|
if nobranch_zip.exists():
|
||||||
@ -83,6 +104,7 @@ class Chapter(BaseModel):
|
|||||||
if zip_path is None:
|
if zip_path is None:
|
||||||
raise FileNotFoundError(f"Chapter for `{self}` not found")
|
raise FileNotFoundError(f"Chapter for `{self}` not found")
|
||||||
zip = zipfile.ZipFile(zip_path)
|
zip = zipfile.ZipFile(zip_path)
|
||||||
|
print(f"Loaded {zip.filename}")
|
||||||
data = zip.read("data.txt")
|
data = zip.read("data.txt")
|
||||||
try:
|
try:
|
||||||
content = json.loads(data)
|
content = json.loads(data)
|
||||||
@ -99,7 +121,6 @@ class Chapter(BaseModel):
|
|||||||
output = []
|
output = []
|
||||||
extras = []
|
extras = []
|
||||||
|
|
||||||
print(f"parsing {zip.filename}")
|
|
||||||
output.append(f"<h2>Глава {self.number}</h2>")
|
output.append(f"<h2>Глава {self.number}</h2>")
|
||||||
for item in content["content"]:
|
for item in content["content"]:
|
||||||
if item["type"] in {"paragraph", "heading"}:
|
if item["type"] in {"paragraph", "heading"}:
|
||||||
@ -189,12 +210,27 @@ class Chapter(BaseModel):
|
|||||||
new_content = new_content.replace(target_str, "") # type: ignore
|
new_content = new_content.replace(target_str, "") # type: ignore
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
md5 = hashlib.md5(resp).digest()
|
||||||
|
final_path = None
|
||||||
|
if md5 in IMG_HASHES:
|
||||||
|
similar_path = IMG_HASHES[md5]
|
||||||
|
img_bytes = (cache / similar_path).read_bytes()
|
||||||
|
if img_bytes == resp:
|
||||||
|
print("Found identical image in cache")
|
||||||
|
final_path = similar_path
|
||||||
|
else:
|
||||||
|
IMG_HASHES[md5] = strip_path
|
||||||
|
|
||||||
|
if final_path is None:
|
||||||
img = epub.EpubImage(
|
img = epub.EpubImage(
|
||||||
file_name=strip_path,
|
file_name=strip_path,
|
||||||
content=resp,
|
content=resp,
|
||||||
)
|
)
|
||||||
replaces.append(img)
|
replaces.append(img)
|
||||||
newiimage = f'<img src="../{strip_path}" />'
|
final_path = strip_path
|
||||||
|
|
||||||
|
newiimage = f'<img loading="lazy" src="../{final_path}" />'
|
||||||
|
print(target_str, newiimage)
|
||||||
new_content = new_content.replace(target_str, newiimage) # type: ignore
|
new_content = new_content.replace(target_str, newiimage) # type: ignore
|
||||||
|
|
||||||
page.content = new_content
|
page.content = new_content
|
||||||
@ -298,7 +334,7 @@ def main():
|
|||||||
book.set_title(info.rusName)
|
book.set_title(info.rusName)
|
||||||
book.add_metadata(namespace="DC", name="description", value=info.summary)
|
book.add_metadata(namespace="DC", name="description", value=info.summary)
|
||||||
|
|
||||||
book.spine = []
|
book.spine = ["translators"]
|
||||||
if cover:
|
if cover:
|
||||||
book.spine.insert(0, "cover")
|
book.spine.insert(0, "cover")
|
||||||
book.set_cover(args.cover.name, cover)
|
book.set_cover(args.cover.name, cover)
|
||||||
@ -314,8 +350,25 @@ def main():
|
|||||||
print("Using default client")
|
print("Using default client")
|
||||||
client = requests.Session()
|
client = requests.Session()
|
||||||
|
|
||||||
|
translators_page = epub.EpubHtml(
|
||||||
|
file_name="translators.xhtml",
|
||||||
|
title="Translators",
|
||||||
|
lang="ru",
|
||||||
|
)
|
||||||
|
book.add_item(translators_page)
|
||||||
|
book.toc.append(translators_page)
|
||||||
|
book.spine.append(translators_page) # type: ignore
|
||||||
|
|
||||||
|
translators = set()
|
||||||
for chapter in chapters:
|
for chapter in chapters:
|
||||||
chapter.add_to_book(book, args.input, args.fetch_images, client, args.cache)
|
chapter.add_to_book(book, args.input, args.fetch_images, client, args.cache)
|
||||||
|
translators.update(chapter.get_translators(args.input))
|
||||||
|
|
||||||
|
translators_content = '<h2 align="center">Переводчики:</h2>\n<ul>\n'
|
||||||
|
for translator in sorted(translators):
|
||||||
|
translators_content += f"<li>{translator}</li>\n"
|
||||||
|
translators_content += "</ul>\n"
|
||||||
|
translators_page.content = translators_content
|
||||||
|
|
||||||
book.add_item(epub.EpubNcx())
|
book.add_item(epub.EpubNcx())
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user