import io import os import re import secrets import tempfile from fileinput import filename from pathlib import Path import click from ebooklib import epub # type: ignore from PIL import Image from requests import HTTPError, auth, request def parse_issuu_url(url: str) -> tuple[str, str]: """Get Username and document_id from issuu url. returns: username: str document_id: str """ issuu_url_pattern = re.compile(r"https://issuu.com/([^\/]*)/docs/(.*)$") if mtc := issuu_url_pattern.match(url): username = mtc.group(1) document_id = mtc.group(2) else: raise ValueError("Issuu URL not Valid!") return username, document_id def create_working_dir() -> Path: """create a working directory. returns: Path() to a temporary directory. """ working_dir = tempfile.mkdtemp(prefix="issuu2epub_") return Path(working_dir) def get_page_urls(username: str, document_id: str) -> list[str]: """get a list of all pages.""" json_url = f"https://reader3.isu.pub/{username}/{document_id}/reader3_4.json" r = request("GET", json_url, timeout=(5, 5)) if not r.ok: raise HTTPError("Failed to download document information") document_data = r.json() return [ f"https://{page['imageUri']}" for page in document_data["document"]["pages"] ] def download_pages(page_urls: list[str], working_dir: Path) -> list[Path]: """download all page images and return file paths.""" page_paths = [] for url in page_urls: filename = url.split("/")[-1] path = Path(working_dir / filename) page_paths.append(path) with request("GET", url=url, stream=True, timeout=(10, 10)) as r: r.raise_for_status() with open(path, "wb") as f: for chunk in r.iter_content(chunk_size=8192): if chunk: f.write(chunk) return page_paths def convert_image(image_path: Path) -> io.BytesIO: """convert image and return bytes array.""" max_image_size = (1000, 1400) target_quality = 50 with Image.open(image_path.as_posix()) as img: if img.mode in ("RGBA", "P"): img = img.convert("RGB") img.thumbnail(max_image_size, Image.Resampling.LANCZOS) img_byte_arr = io.BytesIO() img.save(img_byte_arr, format="JPEG", optimize=True, quality=target_quality) img_byte_arr = img_byte_arr.getvalue() # type: ignore[assignment] return img_byte_arr # type: ignore[return-value] def generate_epub( pages: list[Path], output_file: Path, title: str, author: str ) -> None: """generate epub file.""" book = epub.EpubBook() book.set_identifier(secrets.token_urlsafe(10)) book.set_title(title=title) book.set_language("de") book.add_author(author=author) chapters = [] # Use first image as Cover title_page = epub.EpubHtml(title=title, file_name="title_page.xhtml", lang="de") cover_image = epub.EpubImage() cover_image.file_name = f"images/cover.jpg" cover_image.media_type = "image/jpeg" cover_image.content = convert_image(pages[0]) book.add_item(cover_image) title_page.content = f"""