diff --git a/Web-Scraping/Configurable-Scraper/config.json b/Web-Scraping/Configurable-Scraper/config.json new file mode 100644 index 00000000..1b88a58e --- /dev/null +++ b/Web-Scraping/Configurable-Scraper/config.json @@ -0,0 +1,5 @@ +{ + "url": "https://quotes.toscrape.com", + "tag": "span", + "class": "text" +} \ No newline at end of file diff --git a/Web-Scraping/Configurable-Scraper/readme.md b/Web-Scraping/Configurable-Scraper/readme.md new file mode 100644 index 00000000..fd17d8c1 --- /dev/null +++ b/Web-Scraping/Configurable-Scraper/readme.md @@ -0,0 +1,14 @@ +# Configurable Web Scraper + +This script allows scraping any website using a JSON configuration file. + +## Features +- Config-driven scraping +- Reusable for multiple websites +- Simple and beginner-friendly + +## How to Use + +1. Edit config.json +2. Run: + python scraper.py \ No newline at end of file diff --git a/Web-Scraping/Configurable-Scraper/requirements.txt b/Web-Scraping/Configurable-Scraper/requirements.txt new file mode 100644 index 00000000..a98ae430 --- /dev/null +++ b/Web-Scraping/Configurable-Scraper/requirements.txt @@ -0,0 +1,2 @@ +requests +beautifulsoup4 \ No newline at end of file diff --git a/Web-Scraping/Configurable-Scraper/scrapper.py b/Web-Scraping/Configurable-Scraper/scrapper.py new file mode 100644 index 00000000..230c0263 --- /dev/null +++ b/Web-Scraping/Configurable-Scraper/scrapper.py @@ -0,0 +1,21 @@ +import requests +from bs4 import BeautifulSoup +import json + +# Load config +with open("config.json") as f: + config = json.load(f) + +url = config["url"] +tag = config["tag"] +class_name = config["class"] + +response = requests.get(url) +soup = BeautifulSoup(response.text, "html.parser") + +elements = soup.find_all(tag, class_=class_name) + +print(f"\nScraping from: {url}\n") + +for i, el in enumerate(elements, 1): + print(f"{i}. {el.text.strip()}") \ No newline at end of file