Work in progress.
This commit is contained in:
77
main.py
77
main.py
@@ -1,6 +1,81 @@
|
||||
import re
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
|
||||
from requests import HTTPError, request
|
||||
|
||||
|
||||
def parse_issuu_url(url: str) -> tuple[str, str]:
|
||||
"""Get Username and document_id from issuu url.
|
||||
|
||||
returns:
|
||||
username: str
|
||||
document_id: str
|
||||
"""
|
||||
issuu_url_pattern = re.compile(r"https://issuu.com/([^\/]*)/docs/(.*)$")
|
||||
if mtc := issuu_url_pattern.match(url):
|
||||
username = mtc.group(1)
|
||||
document_id = mtc.group(2)
|
||||
|
||||
else:
|
||||
raise ValueError("Issuu URL not Valid!")
|
||||
|
||||
return username, document_id
|
||||
|
||||
|
||||
def create_working_dir() -> Path:
|
||||
"""create a working directory.
|
||||
|
||||
returns:
|
||||
Path() to a temporary directory.
|
||||
"""
|
||||
working_dir = tempfile.mkdtemp(prefix="issuu2epub_")
|
||||
return Path(working_dir)
|
||||
|
||||
|
||||
def get_page_urls(username: str, document_id: str) -> list[str]:
|
||||
"""get a list of all pages."""
|
||||
json_url = f"https://reader3.isu.pub/{username}/{document_id}/reader3_4.json"
|
||||
r = request("GET", json_url, timeout=(5, 5))
|
||||
if not r.ok:
|
||||
raise HTTPError("Failed to download document information")
|
||||
|
||||
document_data = r.json()
|
||||
return [
|
||||
f"https://{page['imageUri']}" for page in document_data["document"]["pages"]
|
||||
]
|
||||
|
||||
|
||||
def download_pages(page_urls: list[str], working_dir: Path) -> list[Path]:
|
||||
"""download all page images and return file paths."""
|
||||
page_paths = []
|
||||
|
||||
for url in page_urls:
|
||||
filename = url.split("/")[-1]
|
||||
path = Path(working_dir / filename)
|
||||
page_paths.append(path)
|
||||
|
||||
with request("GET", url=url, stream=True, timeout=(10, 10)) as r:
|
||||
r.raise_for_status()
|
||||
|
||||
with open(path, "wb") as f:
|
||||
for chunk in r.iter_content(chunk_size=8192):
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
|
||||
return page_paths
|
||||
|
||||
|
||||
def main() -> None:
|
||||
"""main function."""
|
||||
pass
|
||||
cwd = create_working_dir()
|
||||
username, document_id = parse_issuu_url(
|
||||
"https://issuu.com/bscyb1898/docs/yb_mag_nr._1_saison_2025_26"
|
||||
)
|
||||
|
||||
urls = get_page_urls(username, document_id)
|
||||
|
||||
print(download_pages(urls, cwd))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
Reference in New Issue
Block a user