Link Extractor

Anchor

Extract every anchor link from a page, classified as internal or external with deduplication.

Resolves relative URLs to absolute, deduplicates, and skips javascript:, mailto: and fragment links.

Equivalent Python

from scrapper_tools import fetch
from urllib.parse import urlparse, urljoin

page = fetch("https://quotes.toscrape.com/")
base = urlparse("https://quotes.toscrape.com/")
links = []
for a in page.css("a"):
    href = urljoin("https://quotes.toscrape.com/", a.attrib.get("href", ""))
    internal = urlparse(href).netloc == base.netloc
    links.append({"url": href, "internal": internal})
print(f"Found {len(links)} links")