Compare commits
4 Commits
827a2ae354
...
ca76bdd564
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ca76bdd564
|
||
|
|
52d03aa633
|
||
|
|
2435bd7888
|
||
|
|
71e6c5dd48
|
@@ -10,6 +10,8 @@
|
||||
pkgs.python313Packages.click
|
||||
pkgs.python313Packages.ebooklib
|
||||
pkgs.python313Packages.requests
|
||||
pkgs.python313Packages.pip
|
||||
pkgs.python313Packages.types-requests
|
||||
];
|
||||
|
||||
git-hooks = {
|
||||
|
||||
147
main.py
147
main.py
@@ -1,6 +1,151 @@
|
||||
import re
|
||||
import secrets
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
from ebooklib import epub # type: ignore
|
||||
from requests import HTTPError, auth, request
|
||||
|
||||
|
||||
def parse_issuu_url(url: str) -> tuple[str, str]:
|
||||
"""Get Username and document_id from issuu url.
|
||||
|
||||
returns:
|
||||
username: str
|
||||
document_id: str
|
||||
"""
|
||||
issuu_url_pattern = re.compile(r"https://issuu.com/([^\/]*)/docs/(.*)$")
|
||||
if mtc := issuu_url_pattern.match(url):
|
||||
username = mtc.group(1)
|
||||
document_id = mtc.group(2)
|
||||
|
||||
else:
|
||||
raise ValueError("Issuu URL not Valid!")
|
||||
|
||||
return username, document_id
|
||||
|
||||
|
||||
def create_working_dir() -> Path:
|
||||
"""create a working directory.
|
||||
|
||||
returns:
|
||||
Path() to a temporary directory.
|
||||
"""
|
||||
working_dir = tempfile.mkdtemp(prefix="issuu2epub_")
|
||||
return Path(working_dir)
|
||||
|
||||
|
||||
def get_page_urls(username: str, document_id: str) -> list[str]:
|
||||
"""get a list of all pages."""
|
||||
json_url = f"https://reader3.isu.pub/{username}/{document_id}/reader3_4.json"
|
||||
r = request("GET", json_url, timeout=(5, 5))
|
||||
if not r.ok:
|
||||
raise HTTPError("Failed to download document information")
|
||||
|
||||
document_data = r.json()
|
||||
return [
|
||||
f"https://{page['imageUri']}" for page in document_data["document"]["pages"]
|
||||
]
|
||||
|
||||
|
||||
def download_pages(page_urls: list[str], working_dir: Path) -> list[Path]:
|
||||
"""download all page images and return file paths."""
|
||||
page_paths = []
|
||||
|
||||
for url in page_urls:
|
||||
filename = url.split("/")[-1]
|
||||
path = Path(working_dir / filename)
|
||||
page_paths.append(path)
|
||||
|
||||
with request("GET", url=url, stream=True, timeout=(10, 10)) as r:
|
||||
r.raise_for_status()
|
||||
|
||||
with open(path, "wb") as f:
|
||||
for chunk in r.iter_content(chunk_size=8192):
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
|
||||
return page_paths
|
||||
|
||||
|
||||
def generate_epub(
|
||||
pages: list[Path], output_file: Path, title: str, author: str
|
||||
) -> None:
|
||||
"""generate epub file."""
|
||||
book = epub.EpubBook()
|
||||
book.set_identifier(secrets.token_urlsafe(10))
|
||||
book.set_title(title=title)
|
||||
book.set_language("de")
|
||||
book.add_author(author=author)
|
||||
|
||||
chapters = []
|
||||
|
||||
for i, page in enumerate(pages, start=1):
|
||||
page_title = f"Page {i}"
|
||||
|
||||
image_item = epub.EpubImage()
|
||||
image_item.file_name = page.as_posix()
|
||||
image_item.media_type = "image/png"
|
||||
book.add_item(image_item)
|
||||
|
||||
chapter = epub.EpubHtml(
|
||||
title=page_title, file_name=f"page_{i}.xhtml", lang="de"
|
||||
)
|
||||
|
||||
chapter.content = f"""
|
||||
<html>
|
||||
<head>
|
||||
<title>{page_title}</title>
|
||||
<style>
|
||||
body {{
|
||||
margin: 0;
|
||||
padding: 20px;
|
||||
text-align: center;
|
||||
}}
|
||||
img {{
|
||||
max-width: 100%;
|
||||
height: auto;
|
||||
max-height: 90vh;
|
||||
}}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<img src="{image_item.file_name}" alt="Page {i}"/>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
book.add_item(chapter)
|
||||
chapters.append(chapter)
|
||||
|
||||
book.toc = chapters
|
||||
book.spine = ["nav"] + chapters
|
||||
|
||||
book.add_item(epub.EpubNcx())
|
||||
book.add_item(epub.EpubNav())
|
||||
|
||||
epub.write_epub(output_file, book, {})
|
||||
|
||||
print(f"EPUB erfolgreich erstellt: {output_file}")
|
||||
|
||||
|
||||
def main() -> None:
|
||||
"""main function."""
|
||||
pass
|
||||
cwd = create_working_dir()
|
||||
username, document_id = parse_issuu_url(
|
||||
"https://issuu.com/bscyb1898/docs/yb_mag_nr._1_saison_2025_26"
|
||||
)
|
||||
|
||||
urls = get_page_urls(username, document_id)
|
||||
|
||||
pages = download_pages(urls, cwd)
|
||||
|
||||
generate_epub(
|
||||
pages=pages,
|
||||
output_file=Path("/home/michael/Downloads/YBMag-2025-01.epub"),
|
||||
title="YB Mag 2025 01",
|
||||
author="BSC YB",
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
Reference in New Issue
Block a user