import requests
import os
from tqdm import tqdm
from urllib.parse import urlparse

try:
    from bs4 import BeautifulSoup as bs
except ImportError:
    raise Exception('Please install bs4 package')


def is_valid(url):
    """
    Checks whether `url` is a valid URL.
    """
    parsed = urlparse(url)
    return bool(parsed.netloc) and bool(parsed.scheme)


def get_all_images(url):
    """
    Returns all image URLs on a single `url`
    """

    soup = bs(requests.get(url).content, "html.parser")

    filenames = [x.attrs.get('href') for x in soup.find_all("a") if x.attrs.get('href').__contains__('bmp')]
    urls = [os.path.join(url, filename) for filename in filenames]

    return urls


def download_file(url, pathname):
    """
    Downloads a file given an URL and puts it in the folder `pathname`
    """
    # if path doesn't exist, make that path dir
    if not os.path.isdir(pathname):
        os.makedirs(pathname)
    # download the body of response by chunk, not immediately
    response = requests.get(url, stream=True)
    # get the total file size
    file_size = int(response.headers.get("Content-Length", 0))
    # get the file name
    filename = os.path.join(pathname, url.split("/")[-1])
    # progress bar, changing the unit to bytes instead of iteration (default by tqdm)
    progress = tqdm(response.iter_content(1024), f"Downloading {filename}", total=file_size, unit="B", unit_scale=True, unit_divisor=1024)
    with open(filename, "wb") as f:
        for data in progress:
            # write data read to the file
            f.write(data)
            # update the progress bar manually
            progress.update(len(data))


def download_stack(url, path):
    # get all images
    imgs = get_all_images(url)
    for img in imgs:
        # for each image, download it
        download_file(img, path)


if __name__ == '__main__':
    download_stack('https://www.math.purdue.edu/~lucier/PHOTO_CD/BMP_IMAGES/', os.path.join(os.getcwd(), 'data'))