A Simple Tool for Downloading Files from Hugging Face

Published: (January 19, 2026 at 09:43 PM EST)
6 min read
Source: Dev.to

Source: Dev.to

hf_get_from_url.py – Download Files from Hugging Face Repositories

Developers who work with machine learning models often need to download files from Hugging Face repositories. While the Hugging Face website provides links, manually handling URLs and paths can be inconvenient. The script hf_get_from_url.py simplifies this process by interpreting a variety of input formats and invoking the Hugging Face CLI (hf) to fetch the requested file(s).

What the script can handle

Input styleExample
Full Hugging Face URL with blob or resolvehttps://huggingface.co/owner/repo/blob/main/file.gguf
Shortened form (no scheme)huggingface.co/owner/repo/blob/main/file.gguf
Repo‑style path (owner/repo/file)owner/repo/file.gguf
Direct repository reference (with or without file path)owner/repo or owner/repo/path/to/dir

After parsing the input, the script uses the Hugging Face command‑line interface to download the specified file into a local directory.

Key Features

  • Flexible Input Parsing – Accepts full URLs, shortened URLs, and plain owner/repo/path strings.
  • Dry‑Run Mode--dry-run prints the exact command that would be executed without performing any download.
  • Custom Local Directory--flatten-localdir stores files in a folder whose name uses hyphens (owner-repo) instead of slashes for convenience.
  • Error Handling – Checks that the hf command is installed and provides clear error messages when something goes wrong.

Example Usage

# Download using a full URL
python hf_get_from_url.py "https://huggingface.co/owner/repo/blob/main/file.gguf"

# Download using a repo‑style path
python hf_get_from_url.py owner/repo/file.gguf

# Preview the command without downloading
python hf_get_from_url.py --dry-run "huggingface.co/owner/repo/blob/main/file.gguf"

These examples illustrate how the tool streamlines the workflow for anyone who frequently downloads models, datasets, or configuration files from Hugging Face.

Source Code

#!/usr/bin/env python3
"""
hf_get_from_url.py
Download files or directories from Hugging Face Hub using huggingface_hub API.

Usage:
  python hf_get_from_url.py [--dry-run] [--flatten-localdir]  [ ...]

Examples:
  python hf_get_from_url.py "https://huggingface.co/owner/repo/blob/main/path/to/file.gguf"
  python hf_get_from_url.py owner/repo/path/to/file.gguf
  python hf_get_from_url.py --dry-run "huggingface.co/owner/repo/blob/main/models"

Notes:
  - Requires: pip install huggingface_hub
  - Authentication (if needed) is taken from env HF_TOKEN or huggingface_hub.login()
"""

from __future__ import annotations

import argparse
import sys
import re
from urllib.parse import urlparse, unquote
from typing import Optional, Tuple, List

# huggingface_hub API
try:
    from huggingface_hub import hf_hub_download, snapshot_download
except Exception:
    hf_hub_download = None
    snapshot_download = None

# ----------------------------------------------------------------------
# Regex patterns
# ----------------------------------------------------------------------
RE_BLOB_RESOLVE = re.compile(
    r'^(?:https?://)?(?:www\.)?huggingface\.co/'
    r'(?P[^/]+/[^/]+)/(?:blob|resolve)/'
    r'(?P[^/]+)/(?P.+)$'
)

RE_NO_PREFIX = re.compile(
    r'^(?P[^/]+/[^/]+)/(?:blob|resolve)/'
    r'(?P[^/]+)/(?P.+)$'
)

RE_SIMPLE = re.compile(
    r'^(?P[^/]+/[^/]+)(?:/(?P.+))?$'
)

# ----------------------------------------------------------------------
# Input parser
# ----------------------------------------------------------------------
def parse_input(s: str) -> Optional[Tuple[str, Optional[str], str]]:
    """
    Parse input and return (repo, revision, path)

    revision may be None (meaning default branch)
    """
    s = s.strip()
    s = unquote(s.split('?', 1)[0].split('#', 1)[0]).rstrip('/')

    # 1) Explicit Hugging Face URL (blob/resolve)
    m = RE_BLOB_RESOLVE.match(s)
    if m:
        return m.group('repo'), m.group('rev'), m.group('path')

    # 2) huggingface.co/... without scheme
    if s.startswith('huggingface.co/'):
        candidate = s[len('huggingface.co/'):].lstrip('/')
        m2 = RE_NO_PREFIX.match(candidate)
        if m2:
            return m2.group('repo'), m2.group('rev'), m2.group('path')

        m2 = RE_SIMPLE.match(candidate)
        if m2 and m2.group('path'):
            return m2.group('repo'), None, m2.group('path')

    # 3) Generic URL parse
    try:
        p = urlparse(s)
    except Exception:
        p = None

    if p and p.netloc and 'huggingface' in p.netloc:
        parts = p.path.lstrip('/').split('/')
        if len(parts) >= 5 and parts[2] in ('blob', 'resolve'):
            repo = f"{parts[0]}/{parts[1]}"
            rev = parts[3]
            path = '/'.join(parts[4:])
            return repo, rev, path
        elif len(parts) >= 3:
            repo = f"{parts[0]}/{parts[1]}"
            path = '/'.join(parts[2:])
            return repo, None, path

    # 4) Direct repo/path
    m3 = RE_NO_PREFIX.match(s)
    if m3:
        return m3.group('repo'), m3.group('rev'), m3.group('path')

    m4 = RE_SIMPLE.match(s)
    if m4 and m4.group('path'):
        return m4.group('repo'), None, m4.group('path')

    return None

# ----------------------------------------------------------------------
# Download logic
# ----------------------------------------------------------------------
def run_hf_download_api(
    repo: str,
    path: str,
    rev: Optional[str],
    local_dir: Optional[str],
    dry_run: bool,
) -> int:
    """
    Try single‑file download first; if it fails, fall back to snapshot_download
    (directory or pattern).
    """
    if hf_hub_download is None or snapshot_download is None:
        print(
            "Error: huggingface_hub is not installed. "
            "Please run `pip install huggingface_hub`.",
            file=sys.stderr,
        )
        return 2

    if local_dir is None:
        local_dir = repo

    rev_disp = rev if rev else "default"
    print(f"> (api) download {repo}@{rev_disp} {path} -> local_dir={local_dir}")

    if dry_run:
        return 0

    # ---- Try as single file ----
    try:
        local_path = hf_hub_download(
            repo_id=repo,
            filename=path,
            revision=rev,
            local_dir=local_dir,
        )
        print(f"✔ Downloaded file to {local_path}")
        return 0
    except Exception as e_file:
        print(f"⚠ Single‑file download failed: {e_file}", file=sys.stderr)

    # ---- Fallback: download whole repo or sub‑directory ----
    try:
        snapshot_download(
            repo_id=repo,
            revision=rev,
            local_dir=local_dir,
            allow_patterns=[f"{path}*"],
        )
        print(f"✔ Snapshot downloaded to {local_dir}")
        return 0
    except Exception as e_snap:
        print(f"❌ Snapshot download failed: {e_snap}", file=sys.stderr)
        return 1

# ----------------------------------------------------------------------
# CLI entry point
# ----------------------------------------------------------------------
def main(argv: List[str] | None = None) -> int:
    parser = argparse.ArgumentParser(
        description="Download files from Hugging Face Hub using flexible input formats."
    )
    parser.add_argument(
        "inputs",
        nargs="+",
        help="URL, repo/path, or shortened Hugging Face reference.",
    )
    parser.add_argument(
        "--dry-run",
        action="store_true",
        help="Show the commands that would be run without downloading.",
    )
    parser.add_argument(
        "--flatten-localdir",
        action="store_true",
        help="Replace '/' with '-' in the local directory name.",
    )
    args = parser.parse_args(argv)

    exit_code = 0
    for inp in args.inputs:
        parsed = parse_input(inp)
        if not parsed:
            print(f"❌ Could not parse input: {inp}", file=sys.stderr)
            exit_code = 1
            continue

        repo, rev, path = parsed
        local_dir = repo.replace("/", "-") if args.flatten_localdir else None

        rc = run_hf_download_api(
            repo=repo,
            path=path,
            rev=rev,
            local_dir=local_dir,
            dry_run=args.dry_run,
        )
        if rc != 0:
            exit_code = rc

    return exit_code

if __name__ == "__main__":
    sys.exit(main())

This tool simplifies the workflow for anyone who frequently downloads models, datasets, or configuration files from Hugging Face. By accepting different input styles and providing clear feedback, it makes the process faster and less error‑prone.

filename = path,
revision = rev,
local_dir = local_dir,
)
print(f"Downloaded file: {local_path}")
return 0
except Exception as e:
    print(
        f"hf_hub_download failed: {e}. "
        "Trying snapshot_download for directory/pattern...",
        file=sys.stderr,
    )

# ---- Fallback: directory or glob ----
allow_pattern = path.rstrip("/") + "/*"

try:
    repo_local_dir = snapshot_download(
        repo_id=repo,
        revision=rev,
        local_dir=local_dir,
        allow_patterns=[allow_pattern],
    )
    print(f"Snapshot downloaded into: {repo_local_dir}")
    return 0
except Exception as e:
    print(f"snapshot_download failed: {e}", file=sys.stderr)
    return 3

# ----------------------------------------------------------------------
# Main
# ----------------------------------------------------------------------
def main(argv: List[str]) -> int:
    parser = argparse.ArgumentParser(
        description="Download files or directories from Hugging Face Hub"
    )
    parser.add_argument(
        "inputs",
        nargs="+",
        help="Hugging Face URL or /path",
    )
    parser.add_argument(
        "--dry-run",
        action="store_true",
        help="Print actions without executing",
    )
    parser.add_argument(
        "--hf-cmd",
        default="hf",
        help="(ignored, kept for compatibility)",
    )
    parser.add_argument(
        "--flatten-localdir",
        action="store_true",
        help="Replace '/' with '-' in local directory name",
    )
    args = parser.parse_args(argv)

    any_failed = False

    for s in args.inputs:
        parsed = parse_input(s)
        if not parsed:
            print(f"Failed to parse input: {s}", file=sys.stderr)
            any_failed = True
            continue

        repo, rev, path = parsed
        if not path:
            print(f"No file path extracted for input: {s}", file=sys.stderr)
            any_failed = True
            continue

        local_dir = repo.replace("/", "-") if args.flatten_localdir else repo

        rc = run_hf_download_api(
            repo=repo,
            path=path,
            rev=rev,
            local_dir=local_dir,
            dry_run=args.dry_run,
        )
        if rc != 0:
            any_failed = True

    return 1 if any_failed else 0

if __name__ == "__main__":
    sys.exit(main(sys.argv[1:]))
Back to Blog

Related posts

Read more »

Rapg: TUI-based Secret Manager

We've all been there. You join a new project, and the first thing you hear is: > 'Check the pinned message in Slack for the .env file.' Or you have several .env...

Technology is an Enabler, not a Saviour

Why clarity of thinking matters more than the tools you use Technology is often treated as a magic switch—flip it on, and everything improves. New software, pl...