import re import tempfile from pathlib import Path from requests import HTTPError, request def parse_issuu_url(url: str) -> tuple[str, str]: """Get Username and document_id from issuu url. returns: username: str document_id: str """ issuu_url_pattern = re.compile(r"https://issuu.com/([^\/]*)/docs/(.*)$") if mtc := issuu_url_pattern.match(url): username = mtc.group(1) document_id = mtc.group(2) else: raise ValueError("Issuu URL not Valid!") return username, document_id def create_working_dir() -> Path: """create a working directory. returns: Path() to a temporary directory. """ working_dir = tempfile.mkdtemp(prefix="issuu2epub_") return Path(working_dir) def get_page_urls(username: str, document_id: str) -> list[str]: """get a list of all pages.""" json_url = f"https://reader3.isu.pub/{username}/{document_id}/reader3_4.json" r = request("GET", json_url, timeout=(5, 5)) if not r.ok: raise HTTPError("Failed to download document information") document_data = r.json() return [ f"https://{page['imageUri']}" for page in document_data["document"]["pages"] ] def download_pages(page_urls: list[str], working_dir: Path) -> list[Path]: """download all page images and return file paths.""" page_paths = [] for url in page_urls: filename = url.split("/")[-1] path = Path(working_dir / filename) page_paths.append(path) with request("GET", url=url, stream=True, timeout=(10, 10)) as r: r.raise_for_status() with open(path, "wb") as f: for chunk in r.iter_content(chunk_size=8192): if chunk: f.write(chunk) return page_paths def main() -> None: """main function.""" cwd = create_working_dir() username, document_id = parse_issuu_url( "https://issuu.com/bscyb1898/docs/yb_mag_nr._1_saison_2025_26" ) urls = get_page_urls(username, document_id) print(download_pages(urls, cwd)) if __name__ == "__main__": main()