import re import secrets import tempfile from pathlib import Path from ebooklib import epub # type: ignore from requests import HTTPError, auth, request def parse_issuu_url(url: str) -> tuple[str, str]: """Get Username and document_id from issuu url. returns: username: str document_id: str """ issuu_url_pattern = re.compile(r"https://issuu.com/([^\/]*)/docs/(.*)$") if mtc := issuu_url_pattern.match(url): username = mtc.group(1) document_id = mtc.group(2) else: raise ValueError("Issuu URL not Valid!") return username, document_id def create_working_dir() -> Path: """create a working directory. returns: Path() to a temporary directory. """ working_dir = tempfile.mkdtemp(prefix="issuu2epub_") return Path(working_dir) def get_page_urls(username: str, document_id: str) -> list[str]: """get a list of all pages.""" json_url = f"https://reader3.isu.pub/{username}/{document_id}/reader3_4.json" r = request("GET", json_url, timeout=(5, 5)) if not r.ok: raise HTTPError("Failed to download document information") document_data = r.json() return [ f"https://{page['imageUri']}" for page in document_data["document"]["pages"] ] def download_pages(page_urls: list[str], working_dir: Path) -> list[Path]: """download all page images and return file paths.""" page_paths = [] for url in page_urls: filename = url.split("/")[-1] path = Path(working_dir / filename) page_paths.append(path) with request("GET", url=url, stream=True, timeout=(10, 10)) as r: r.raise_for_status() with open(path, "wb") as f: for chunk in r.iter_content(chunk_size=8192): if chunk: f.write(chunk) return page_paths def generate_epub( pages: list[Path], output_file: Path, title: str, author: str ) -> None: """generate epub file.""" book = epub.EpubBook() book.set_identifier(secrets.token_urlsafe(10)) book.set_title(title=title) book.set_language("de") book.add_author(author=author) chapters = [] for i, page in enumerate(pages, start=1): page_title = f"Page {i}" image_item = epub.EpubImage() image_item.file_name = page.as_posix() image_item.media_type = "image/png" book.add_item(image_item) chapter = epub.EpubHtml( title=page_title, file_name=f"page_{i}.xhtml", lang="de" ) chapter.content = f""" {page_title} Page {i} """ book.add_item(chapter) chapters.append(chapter) book.toc = chapters book.spine = ["nav"] + chapters book.add_item(epub.EpubNcx()) book.add_item(epub.EpubNav()) epub.write_epub(output_file, book, {}) print(f"EPUB erfolgreich erstellt: {output_file}") def main() -> None: """main function.""" cwd = create_working_dir() username, document_id = parse_issuu_url( "https://issuu.com/bscyb1898/docs/yb_mag_nr._1_saison_2025_26" ) urls = get_page_urls(username, document_id) pages = download_pages(urls, cwd) generate_epub( pages=pages, output_file=Path("/home/michael/Downloads/YBMag-2025-01.epub"), title="YB Mag 2025 01", author="BSC YB", ) if __name__ == "__main__": main()