issuu2epub/issuu2epub.py

import io
import os
import re
import secrets
import tempfile
from fileinput import filename
from pathlib import Path

import click
from ebooklib import epub  # type: ignore
from PIL import Image
from requests import HTTPError, auth, request


def parse_issuu_url(url: str) -> tuple[str, str]:
    """Get Username and document_id from issuu url.

    returns:
        username: str
        document_id: str
    """
    issuu_url_pattern = re.compile(r"https://issuu.com/([^\/]*)/docs/(.*)$")
    if mtc := issuu_url_pattern.match(url):
        username = mtc.group(1)
        document_id = mtc.group(2)

    else:
        raise ValueError("Issuu URL not Valid!")

    return username, document_id


def create_working_dir() -> Path:
    """create a working directory.

    returns:
        Path() to a temporary directory.
    """
    working_dir = tempfile.mkdtemp(prefix="issuu2epub_")
    return Path(working_dir)


def get_page_urls(username: str, document_id: str) -> list[str]:
    """get a list of all pages."""
    json_url = f"https://reader3.isu.pub/{username}/{document_id}/reader3_4.json"
    r = request("GET", json_url, timeout=(5, 5))
    if not r.ok:
        raise HTTPError("Failed to download document information")

    document_data = r.json()
    return [
        f"https://{page['imageUri']}" for page in document_data["document"]["pages"]
    ]


def download_pages(page_urls: list[str], working_dir: Path) -> list[Path]:
    """download all page images and return file paths."""
    page_paths = []

    for url in page_urls:
        filename = url.split("/")[-1]
        path = Path(working_dir / filename)
        page_paths.append(path)

        with request("GET", url=url, stream=True, timeout=(10, 10)) as r:
            r.raise_for_status()

            with open(path, "wb") as f:
                for chunk in r.iter_content(chunk_size=8192):
                    if chunk:
                        f.write(chunk)

    return page_paths


def convert_image(image_path: Path) -> io.BytesIO:
    """convert image and return bytes array."""
    max_image_size = (1000, 1400)
    target_quality = 50

    with Image.open(image_path.as_posix()) as img:

        if img.mode in ("RGBA", "P"):
            img = img.convert("RGB")

        img.thumbnail(max_image_size, Image.Resampling.LANCZOS)
        img_byte_arr = io.BytesIO()
        img.save(img_byte_arr, format="JPEG", optimize=True, quality=target_quality)
        img_byte_arr = img_byte_arr.getvalue()  # type: ignore[assignment]

    return img_byte_arr  # type: ignore[return-value]


def generate_epub(
    pages: list[Path], output_file: Path, title: str, author: str
) -> None:
    """generate epub file."""
    book = epub.EpubBook()
    book.set_identifier(secrets.token_urlsafe(10))
    book.set_title(title=title)
    book.set_language("de")
    book.add_author(author=author)

    chapters = []

    # Use first image as Cover
    title_page = epub.EpubHtml(title=title, file_name="title_page.xhtml", lang="de")

    cover_image = epub.EpubImage()
    cover_image.file_name = f"images/cover.jpg"
    cover_image.media_type = "image/jpeg"
    cover_image.content = convert_image(pages[0])

    book.add_item(cover_image)

    title_page.content = f"""
    <html>
        <head>
            <title>{title}</title>
            <style>
                body {{
                    margin: 0;
                    padding: 20px;
                    text-align: center;
                }}
                img {{
                    max-width: 100%;
                    height: auto;
                    max-height: 90vh;
                }}
            </style>
        </head>
        <body>
            <img src="{cover_image.file_name}" alt="Cover"/>
        </body>
    </html>
    """

    book.add_item(title_page)
    chapters.append(title_page)

    # Add Pages.
    for i, page in enumerate(pages[1:], start=1):
        page_title = f"Page {i}"

        image_item = epub.EpubImage()
        image_item.file_name = f"images/page_{i:03d}.jpg"
        image_item.media_type = "image/jpeg"
        image_item.content = convert_image(page)

        book.add_item(image_item)

        chapter = epub.EpubHtml(
            title=page_title, file_name=f"page_{i}.xhtml", lang="de"
        )

        chapter.content = f"""
        <html>
            <head>
                <title>{page_title}</title>
                <style>
                    body {{
                        margin: 0;
                        padding: 20px;
                        text-align: center;
                    }}
                    img {{
                        max-width: 100%;
                        height: auto;
                        max-height: 90vh;
                    }}
                </style>
            </head>
            <body>
                <img src="{image_item.file_name}" alt="Page {i}"/>
            </body>
        </html>
        """

        book.add_item(chapter)
        chapters.append(chapter)

    book.spine = ["nav"] + chapters

    epub.write_epub(output_file, book, {})

    print(f"✅ Kindle-optimiertes EPUB erstellt: {output_file}")


@click.command()
@click.option("--url", prompt="Issuu URL", help="Issuu URL to convert to EPUB")
@click.option("--title", prompt="Document Title", help="Document Title")
@click.option("--author", prompt="Document Author", help="Document Author")
@click.option(
    "--output",
    prompt="EPUB Output Filename",
    help="EPUB Output File",
)
def main(url: str, title: str, author: str, output: str) -> None:
    """main function."""
    cwd = create_working_dir()
    username, document_id = parse_issuu_url(url=url)

    urls = get_page_urls(username, document_id)

    pages = download_pages(urls, cwd)

    generate_epub(
        pages=pages,
        output_file=Path(output),
        title=title,
        author=author,
    )


if __name__ == "__main__":
    main()