Compare commits

...

4 Commits

Author SHA1 Message Date
Michael Lehmann
ca76bdd564 Add generate epub function. 2025-10-05 15:58:46 +02:00
Michael Lehmann
52d03aa633 Add types-requests. 2025-10-05 14:45:34 +02:00
Michael Lehmann
2435bd7888 Work in progress. 2025-10-05 14:45:24 +02:00
Michael Lehmann
71e6c5dd48 Add Pip. 2025-10-05 14:43:04 +02:00
2 changed files with 148 additions and 1 deletions

View File

@@ -10,6 +10,8 @@
pkgs.python313Packages.click pkgs.python313Packages.click
pkgs.python313Packages.ebooklib pkgs.python313Packages.ebooklib
pkgs.python313Packages.requests pkgs.python313Packages.requests
pkgs.python313Packages.pip
pkgs.python313Packages.types-requests
]; ];
git-hooks = { git-hooks = {

147
main.py
View File

@@ -1,6 +1,151 @@
import re
import secrets
import tempfile
from pathlib import Path
from ebooklib import epub # type: ignore
from requests import HTTPError, auth, request
def parse_issuu_url(url: str) -> tuple[str, str]:
"""Get Username and document_id from issuu url.
returns:
username: str
document_id: str
"""
issuu_url_pattern = re.compile(r"https://issuu.com/([^\/]*)/docs/(.*)$")
if mtc := issuu_url_pattern.match(url):
username = mtc.group(1)
document_id = mtc.group(2)
else:
raise ValueError("Issuu URL not Valid!")
return username, document_id
def create_working_dir() -> Path:
"""create a working directory.
returns:
Path() to a temporary directory.
"""
working_dir = tempfile.mkdtemp(prefix="issuu2epub_")
return Path(working_dir)
def get_page_urls(username: str, document_id: str) -> list[str]:
"""get a list of all pages."""
json_url = f"https://reader3.isu.pub/{username}/{document_id}/reader3_4.json"
r = request("GET", json_url, timeout=(5, 5))
if not r.ok:
raise HTTPError("Failed to download document information")
document_data = r.json()
return [
f"https://{page['imageUri']}" for page in document_data["document"]["pages"]
]
def download_pages(page_urls: list[str], working_dir: Path) -> list[Path]:
"""download all page images and return file paths."""
page_paths = []
for url in page_urls:
filename = url.split("/")[-1]
path = Path(working_dir / filename)
page_paths.append(path)
with request("GET", url=url, stream=True, timeout=(10, 10)) as r:
r.raise_for_status()
with open(path, "wb") as f:
for chunk in r.iter_content(chunk_size=8192):
if chunk:
f.write(chunk)
return page_paths
def generate_epub(
pages: list[Path], output_file: Path, title: str, author: str
) -> None:
"""generate epub file."""
book = epub.EpubBook()
book.set_identifier(secrets.token_urlsafe(10))
book.set_title(title=title)
book.set_language("de")
book.add_author(author=author)
chapters = []
for i, page in enumerate(pages, start=1):
page_title = f"Page {i}"
image_item = epub.EpubImage()
image_item.file_name = page.as_posix()
image_item.media_type = "image/png"
book.add_item(image_item)
chapter = epub.EpubHtml(
title=page_title, file_name=f"page_{i}.xhtml", lang="de"
)
chapter.content = f"""
<html>
<head>
<title>{page_title}</title>
<style>
body {{
margin: 0;
padding: 20px;
text-align: center;
}}
img {{
max-width: 100%;
height: auto;
max-height: 90vh;
}}
</style>
</head>
<body>
<img src="{image_item.file_name}" alt="Page {i}"/>
</body>
</html>
"""
book.add_item(chapter)
chapters.append(chapter)
book.toc = chapters
book.spine = ["nav"] + chapters
book.add_item(epub.EpubNcx())
book.add_item(epub.EpubNav())
epub.write_epub(output_file, book, {})
print(f"EPUB erfolgreich erstellt: {output_file}")
def main() -> None: def main() -> None:
"""main function.""" """main function."""
pass cwd = create_working_dir()
username, document_id = parse_issuu_url(
"https://issuu.com/bscyb1898/docs/yb_mag_nr._1_saison_2025_26"
)
urls = get_page_urls(username, document_id)
pages = download_pages(urls, cwd)
generate_epub(
pages=pages,
output_file=Path("/home/michael/Downloads/YBMag-2025-01.epub"),
title="YB Mag 2025 01",
author="BSC YB",
)
if __name__ == "__main__": if __name__ == "__main__":