From 27fcbc379bd21c25acf13754597b884c16e53e78 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bal=C3=A1zs=20Toldi?= Date: Wed, 16 Jul 2025 15:00:48 +0200 Subject: [PATCH] Refactor --- main.py | 6 ------ pyproject.toml | 5 +++++ run.py | 16 ++++++++++------ src/scraper/__init__.py | 4 ++++ scraper.py => src/scraper/scraper.py | 8 +++++++- 5 files changed, 26 insertions(+), 13 deletions(-) delete mode 100644 main.py create mode 100644 src/scraper/__init__.py rename scraper.py => src/scraper/scraper.py (92%) diff --git a/main.py b/main.py deleted file mode 100644 index 95a905c..0000000 --- a/main.py +++ /dev/null @@ -1,6 +0,0 @@ -def main(): - print("Hello from hardverapro-mcp!") - - -if __name__ == "__main__": - main() diff --git a/pyproject.toml b/pyproject.toml index 3fad2c9..3712d93 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,3 +9,8 @@ dependencies = [ "gradio[mcp]>=5.34.2", "requests>=2.32.4", ] +[tool.setuptools.package-dir] +"scraper" = "src/scraper" + +[project.scripts] +run-scraper = "run:main" \ No newline at end of file diff --git a/run.py b/run.py index 5746810..181c3ca 100644 --- a/run.py +++ b/run.py @@ -1,4 +1,10 @@ -import scraper +import sys +from pathlib import Path + +# Add the src directory to Python path +sys.path.append(str(Path(__file__).parent / "src")) + +from scraper import categories, search, fetch import gradio as gr def hardverapro_search(query: str,offset: int = 0, category: str = "All")-> list: @@ -16,7 +22,7 @@ def hardverapro_search(query: str,offset: int = 0, category: str = "All")-> list - 'price' (str): The price listed - 'link' (str): A full URL to the listing """ - raw = scraper.search(query,offset=offset,category=category) + raw = search(query,offset=offset,category=category) return [[r["title"], r["price"], r["link"]] for r in raw] def hardverapro_fetch(url: str) -> dict: @@ -32,7 +38,7 @@ def hardverapro_fetch(url: str) -> dict: - 'description' (str): The description of the listing - 'img' (str): A full URL to the image """ - return scraper.fetch(url) + return fetch(url) @@ -41,7 +47,7 @@ iface_search = gr.Interface( inputs=[ gr.Textbox(label="Search query"), gr.Number(label="Offset (e.g. 100, 200)", value=0), - gr.Dropdown(choices=scraper.categories, value="All"), + gr.Dropdown(choices=categories, value="All"), ], outputs=gr.Dataframe(headers=["title", "price", "link"], type="array"), title="HardverApró Search" @@ -61,8 +67,6 @@ tabbed_interface = gr.TabbedInterface( def main(): - #scraper.fetch("https://hardverapro.hu/apro/sff_2_5_10k_15k_sas_hdd-k_1_2tb-ig/friss.html") - #iface.launch(mcp_server=True) tabbed_interface.launch(mcp_server=True) if __name__ == "__main__": diff --git a/src/scraper/__init__.py b/src/scraper/__init__.py new file mode 100644 index 0000000..48085c4 --- /dev/null +++ b/src/scraper/__init__.py @@ -0,0 +1,4 @@ +__version__ = "1.0.0" +from .scraper import categories, search, fetch + +__all__ = ["fetch", "search", "categories"] \ No newline at end of file diff --git a/scraper.py b/src/scraper/scraper.py similarity index 92% rename from scraper.py rename to src/scraper/scraper.py index 515956b..b58efb4 100644 --- a/scraper.py +++ b/src/scraper/scraper.py @@ -7,6 +7,7 @@ SEARCH_URL="keres.php?stext=" categoriesMap = { "All": "", + "Hardver": "hardver", "Alaplap": "hardver/alaplap/", "Processzor": "hardver/processzor/", "Memória": "hardver/memoria/", @@ -18,6 +19,7 @@ categoriesMap = { "Szerver SSD, HDD": "hardver/merevlemez_ssd/szerver_hdd_ssd/", "Adathordozó": "hardver/adathordozo/", "Hálózati termékek": "hardver/halozati_termekek/", + "Switch, HUB": "hardver/halozati_termekek/router_switch_repeater/switch_hub/", "3D nyomtatás": "hardver/3d_nyomtatas/", "Nyomtató, szkenner": "hardver/nyomtato_szkenner/", "Játékvezérlő, szimulátor": "hardver/jatekvezerlo/", @@ -90,4 +92,8 @@ def findCategories(url= "https://hardverapro.hu/aprok/hardver/index.html"): name = a_tag.text.strip() href = a_tag['href'] if a_tag and a_tag.has_attr('href') else 'No link' href = href.rsplit('/',1)[0][6:] + '/' - print(f"\"{name}\": \"{href}\",") \ No newline at end of file + print(f"\"{name}\": \"{href}\",") + + +# Export it explicitly +__all__ = ['categories', 'search', 'fetch'] # optional, but good practice \ No newline at end of file