218 lines
5.8 KiB
Python
218 lines
5.8 KiB
Python
import io
|
|
import os
|
|
import re
|
|
import secrets
|
|
import tempfile
|
|
from fileinput import filename
|
|
from pathlib import Path
|
|
|
|
import click
|
|
from ebooklib import epub # type: ignore
|
|
from PIL import Image
|
|
from requests import HTTPError, auth, request
|
|
|
|
|
|
def parse_issuu_url(url: str) -> tuple[str, str]:
|
|
"""Get Username and document_id from issuu url.
|
|
|
|
returns:
|
|
username: str
|
|
document_id: str
|
|
"""
|
|
issuu_url_pattern = re.compile(r"https://issuu.com/([^\/]*)/docs/(.*)$")
|
|
if mtc := issuu_url_pattern.match(url):
|
|
username = mtc.group(1)
|
|
document_id = mtc.group(2)
|
|
|
|
else:
|
|
raise ValueError("Issuu URL not Valid!")
|
|
|
|
return username, document_id
|
|
|
|
|
|
def create_working_dir() -> Path:
|
|
"""create a working directory.
|
|
|
|
returns:
|
|
Path() to a temporary directory.
|
|
"""
|
|
working_dir = tempfile.mkdtemp(prefix="issuu2epub_")
|
|
return Path(working_dir)
|
|
|
|
|
|
def get_page_urls(username: str, document_id: str) -> list[str]:
|
|
"""get a list of all pages."""
|
|
json_url = f"https://reader3.isu.pub/{username}/{document_id}/reader3_4.json"
|
|
r = request("GET", json_url, timeout=(5, 5))
|
|
if not r.ok:
|
|
raise HTTPError("Failed to download document information")
|
|
|
|
document_data = r.json()
|
|
return [
|
|
f"https://{page['imageUri']}" for page in document_data["document"]["pages"]
|
|
]
|
|
|
|
|
|
def download_pages(page_urls: list[str], working_dir: Path) -> list[Path]:
|
|
"""download all page images and return file paths."""
|
|
page_paths = []
|
|
|
|
for url in page_urls:
|
|
filename = url.split("/")[-1]
|
|
path = Path(working_dir / filename)
|
|
page_paths.append(path)
|
|
|
|
with request("GET", url=url, stream=True, timeout=(10, 10)) as r:
|
|
r.raise_for_status()
|
|
|
|
with open(path, "wb") as f:
|
|
for chunk in r.iter_content(chunk_size=8192):
|
|
if chunk:
|
|
f.write(chunk)
|
|
|
|
return page_paths
|
|
|
|
|
|
def convert_image(image_path: Path) -> io.BytesIO:
|
|
"""convert image and return bytes array."""
|
|
max_image_size = (1000, 1400)
|
|
target_quality = 50
|
|
|
|
with Image.open(image_path.as_posix()) as img:
|
|
|
|
if img.mode in ("RGBA", "P"):
|
|
img = img.convert("RGB")
|
|
|
|
img.thumbnail(max_image_size, Image.Resampling.LANCZOS)
|
|
img_byte_arr = io.BytesIO()
|
|
img.save(img_byte_arr, format="JPEG", optimize=True, quality=target_quality)
|
|
img_byte_arr = img_byte_arr.getvalue() # type: ignore[assignment]
|
|
|
|
return img_byte_arr # type: ignore[return-value]
|
|
|
|
|
|
def generate_epub(
|
|
pages: list[Path], output_file: Path, title: str, author: str
|
|
) -> None:
|
|
"""generate epub file."""
|
|
book = epub.EpubBook()
|
|
book.set_identifier(secrets.token_urlsafe(10))
|
|
book.set_title(title=title)
|
|
book.set_language("de")
|
|
book.add_author(author=author)
|
|
|
|
chapters = []
|
|
|
|
# Use first image as Cover
|
|
title_page = epub.EpubHtml(title=title, file_name="title_page.xhtml", lang="de")
|
|
|
|
cover_image = epub.EpubImage()
|
|
cover_image.file_name = f"images/cover.jpg"
|
|
cover_image.media_type = "image/jpeg"
|
|
cover_image.content = convert_image(pages[0])
|
|
|
|
book.add_item(cover_image)
|
|
|
|
title_page.content = f"""
|
|
<html>
|
|
<head>
|
|
<title>{title}</title>
|
|
<style>
|
|
body {{
|
|
margin: 0;
|
|
padding: 20px;
|
|
text-align: center;
|
|
}}
|
|
img {{
|
|
max-width: 100%;
|
|
height: auto;
|
|
max-height: 90vh;
|
|
}}
|
|
</style>
|
|
</head>
|
|
<body>
|
|
<img src="{cover_image.file_name}" alt="Cover"/>
|
|
</body>
|
|
</html>
|
|
"""
|
|
|
|
book.add_item(title_page)
|
|
chapters.append(title_page)
|
|
|
|
# Add Pages.
|
|
for i, page in enumerate(pages[1:], start=1):
|
|
page_title = f"Page {i}"
|
|
|
|
image_item = epub.EpubImage()
|
|
image_item.file_name = f"images/page_{i:03d}.jpg"
|
|
image_item.media_type = "image/jpeg"
|
|
image_item.content = convert_image(page)
|
|
|
|
book.add_item(image_item)
|
|
|
|
chapter = epub.EpubHtml(
|
|
title=page_title, file_name=f"page_{i}.xhtml", lang="de"
|
|
)
|
|
|
|
chapter.content = f"""
|
|
<html>
|
|
<head>
|
|
<title>{page_title}</title>
|
|
<style>
|
|
body {{
|
|
margin: 0;
|
|
padding: 20px;
|
|
text-align: center;
|
|
}}
|
|
img {{
|
|
max-width: 100%;
|
|
height: auto;
|
|
max-height: 90vh;
|
|
}}
|
|
</style>
|
|
</head>
|
|
<body>
|
|
<img src="{image_item.file_name}" alt="Page {i}"/>
|
|
</body>
|
|
</html>
|
|
"""
|
|
|
|
book.add_item(chapter)
|
|
chapters.append(chapter)
|
|
|
|
book.spine = ["nav"] + chapters
|
|
|
|
epub.write_epub(output_file, book, {})
|
|
|
|
print(f"✅ Kindle-optimiertes EPUB erstellt: {output_file}")
|
|
|
|
|
|
@click.command()
|
|
@click.option("--url", prompt="Issuu URL", help="Issuu URL to convert to EPUB")
|
|
@click.option("--title", prompt="Document Title", help="Document Title")
|
|
@click.option("--author", prompt="Document Author", help="Document Author")
|
|
@click.option(
|
|
"--output",
|
|
prompt="EPUB Output Filename",
|
|
help="EPUB Output File",
|
|
)
|
|
def main(url: str, title: str, author: str, output: str) -> None:
|
|
"""main function."""
|
|
cwd = create_working_dir()
|
|
username, document_id = parse_issuu_url(url=url)
|
|
|
|
urls = get_page_urls(username, document_id)
|
|
|
|
pages = download_pages(urls, cwd)
|
|
|
|
generate_epub(
|
|
pages=pages,
|
|
output_file=Path(output),
|
|
title=title,
|
|
author=author,
|
|
)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|