import io import os import re import secrets import tempfile from fileinput import filename from pathlib import Path import click from ebooklib import epub # type: ignore from PIL import Image from requests import HTTPError, auth, request def parse_issuu_url(url: str) -> tuple[str, str]: """Get Username and document_id from issuu url. returns: username: str document_id: str """ issuu_url_pattern = re.compile(r"https://issuu.com/([^\/]*)/docs/(.*)$") if mtc := issuu_url_pattern.match(url): username = mtc.group(1) document_id = mtc.group(2) else: raise ValueError("Issuu URL not Valid!") return username, document_id def create_working_dir() -> Path: """create a working directory. returns: Path() to a temporary directory. """ working_dir = tempfile.mkdtemp(prefix="issuu2epub_") return Path(working_dir) def get_page_urls(username: str, document_id: str) -> list[str]: """get a list of all pages.""" json_url = f"https://reader3.isu.pub/{username}/{document_id}/reader3_4.json" r = request("GET", json_url, timeout=(5, 5)) if not r.ok: raise HTTPError("Failed to download document information") document_data = r.json() return [ f"https://{page['imageUri']}" for page in document_data["document"]["pages"] ] def download_pages(page_urls: list[str], working_dir: Path) -> list[Path]: """download all page images and return file paths.""" page_paths = [] for url in page_urls: filename = url.split("/")[-1] path = Path(working_dir / filename) page_paths.append(path) with request("GET", url=url, stream=True, timeout=(10, 10)) as r: r.raise_for_status() with open(path, "wb") as f: for chunk in r.iter_content(chunk_size=8192): if chunk: f.write(chunk) return page_paths def convert_image(image_path: Path) -> io.BytesIO: """convert image and return bytes array.""" max_image_size = (1000, 1400) target_quality = 50 with Image.open(image_path.as_posix()) as img: if img.mode in ("RGBA", "P"): img = img.convert("RGB") img.thumbnail(max_image_size, Image.Resampling.LANCZOS) img_byte_arr = io.BytesIO() img.save(img_byte_arr, format="JPEG", optimize=True, quality=target_quality) img_byte_arr = img_byte_arr.getvalue() # type: ignore[assignment] return img_byte_arr # type: ignore[return-value] def generate_epub( pages: list[Path], output_file: Path, title: str, author: str ) -> None: """generate epub file.""" book = epub.EpubBook() book.set_identifier(secrets.token_urlsafe(10)) book.set_title(title=title) book.set_language("de") book.add_author(author=author) chapters = [] # Use first image as Cover title_page = epub.EpubHtml(title=title, file_name="title_page.xhtml", lang="de") cover_image = epub.EpubImage() cover_image.file_name = f"images/cover.jpg" cover_image.media_type = "image/jpeg" cover_image.content = convert_image(pages[0]) book.add_item(cover_image) title_page.content = f""" {title} Cover """ book.add_item(title_page) chapters.append(title_page) # Add Pages. for i, page in enumerate(pages[1:], start=1): page_title = f"Page {i}" image_item = epub.EpubImage() image_item.file_name = f"images/page_{i:03d}.jpg" image_item.media_type = "image/jpeg" image_item.content = convert_image(page) book.add_item(image_item) chapter = epub.EpubHtml( title=page_title, file_name=f"page_{i}.xhtml", lang="de" ) chapter.content = f""" {page_title} Page {i} """ book.add_item(chapter) chapters.append(chapter) book.spine = ["nav"] + chapters epub.write_epub(output_file, book, {}) print(f"✅ Kindle-optimiertes EPUB erstellt: {output_file}") @click.command() @click.option("--url", prompt="Issuu URL", help="Issuu URL to convert to EPUB") @click.option("--title", prompt="Document Title", help="Document Title") @click.option("--author", prompt="Document Author", help="Document Author") @click.option( "--output", prompt="EPUB Output Filename", help="EPUB Output File", ) def main(url: str, title: str, author: str, output: str) -> None: """main function.""" cwd = create_working_dir() username, document_id = parse_issuu_url(url=url) urls = get_page_urls(username, document_id) pages = download_pages(urls, cwd) generate_epub( pages=pages, output_file=Path(output), title=title, author=author, ) if __name__ == "__main__": main()