This commit is contained in:
Balázs Toldi 2025-07-16 15:00:48 +02:00
parent 6e0902bcbc
commit 27fcbc379b
5 changed files with 26 additions and 13 deletions

View file

@ -1,6 +0,0 @@
def main():
print("Hello from hardverapro-mcp!")
if __name__ == "__main__":
main()

View file

@ -9,3 +9,8 @@ dependencies = [
"gradio[mcp]>=5.34.2", "gradio[mcp]>=5.34.2",
"requests>=2.32.4", "requests>=2.32.4",
] ]
[tool.setuptools.package-dir]
"scraper" = "src/scraper"
[project.scripts]
run-scraper = "run:main"

16
run.py
View file

@ -1,4 +1,10 @@
import scraper import sys
from pathlib import Path
# Add the src directory to Python path
sys.path.append(str(Path(__file__).parent / "src"))
from scraper import categories, search, fetch
import gradio as gr import gradio as gr
def hardverapro_search(query: str,offset: int = 0, category: str = "All")-> list: def hardverapro_search(query: str,offset: int = 0, category: str = "All")-> list:
@ -16,7 +22,7 @@ def hardverapro_search(query: str,offset: int = 0, category: str = "All")-> list
- 'price' (str): The price listed - 'price' (str): The price listed
- 'link' (str): A full URL to the listing - 'link' (str): A full URL to the listing
""" """
raw = scraper.search(query,offset=offset,category=category) raw = search(query,offset=offset,category=category)
return [[r["title"], r["price"], r["link"]] for r in raw] return [[r["title"], r["price"], r["link"]] for r in raw]
def hardverapro_fetch(url: str) -> dict: def hardverapro_fetch(url: str) -> dict:
@ -32,7 +38,7 @@ def hardverapro_fetch(url: str) -> dict:
- 'description' (str): The description of the listing - 'description' (str): The description of the listing
- 'img' (str): A full URL to the image - 'img' (str): A full URL to the image
""" """
return scraper.fetch(url) return fetch(url)
@ -41,7 +47,7 @@ iface_search = gr.Interface(
inputs=[ inputs=[
gr.Textbox(label="Search query"), gr.Textbox(label="Search query"),
gr.Number(label="Offset (e.g. 100, 200)", value=0), gr.Number(label="Offset (e.g. 100, 200)", value=0),
gr.Dropdown(choices=scraper.categories, value="All"), gr.Dropdown(choices=categories, value="All"),
], ],
outputs=gr.Dataframe(headers=["title", "price", "link"], type="array"), outputs=gr.Dataframe(headers=["title", "price", "link"], type="array"),
title="HardverApró Search" title="HardverApró Search"
@ -61,8 +67,6 @@ tabbed_interface = gr.TabbedInterface(
def main(): def main():
#scraper.fetch("https://hardverapro.hu/apro/sff_2_5_10k_15k_sas_hdd-k_1_2tb-ig/friss.html")
#iface.launch(mcp_server=True)
tabbed_interface.launch(mcp_server=True) tabbed_interface.launch(mcp_server=True)
if __name__ == "__main__": if __name__ == "__main__":

4
src/scraper/__init__.py Normal file
View file

@ -0,0 +1,4 @@
__version__ = "1.0.0"
from .scraper import categories, search, fetch
__all__ = ["fetch", "search", "categories"]

View file

@ -7,6 +7,7 @@ SEARCH_URL="keres.php?stext="
categoriesMap = { categoriesMap = {
"All": "", "All": "",
"Hardver": "hardver",
"Alaplap": "hardver/alaplap/", "Alaplap": "hardver/alaplap/",
"Processzor": "hardver/processzor/", "Processzor": "hardver/processzor/",
"Memória": "hardver/memoria/", "Memória": "hardver/memoria/",
@ -18,6 +19,7 @@ categoriesMap = {
"Szerver SSD, HDD": "hardver/merevlemez_ssd/szerver_hdd_ssd/", "Szerver SSD, HDD": "hardver/merevlemez_ssd/szerver_hdd_ssd/",
"Adathordozó": "hardver/adathordozo/", "Adathordozó": "hardver/adathordozo/",
"Hálózati termékek": "hardver/halozati_termekek/", "Hálózati termékek": "hardver/halozati_termekek/",
"Switch, HUB": "hardver/halozati_termekek/router_switch_repeater/switch_hub/",
"3D nyomtatás": "hardver/3d_nyomtatas/", "3D nyomtatás": "hardver/3d_nyomtatas/",
"Nyomtató, szkenner": "hardver/nyomtato_szkenner/", "Nyomtató, szkenner": "hardver/nyomtato_szkenner/",
"Játékvezérlő, szimulátor": "hardver/jatekvezerlo/", "Játékvezérlő, szimulátor": "hardver/jatekvezerlo/",
@ -91,3 +93,7 @@ def findCategories(url= "https://hardverapro.hu/aprok/hardver/index.html"):
href = a_tag['href'] if a_tag and a_tag.has_attr('href') else 'No link' href = a_tag['href'] if a_tag and a_tag.has_attr('href') else 'No link'
href = href.rsplit('/',1)[0][6:] + '/' href = href.rsplit('/',1)[0][6:] + '/'
print(f"\"{name}\": \"{href}\",") print(f"\"{name}\": \"{href}\",")
# Export it explicitly
__all__ = ['categories', 'search', 'fetch'] # optional, but good practice