Refactor
This commit is contained in:
parent
6e0902bcbc
commit
27fcbc379b
5 changed files with 26 additions and 13 deletions
6
main.py
6
main.py
|
@ -1,6 +0,0 @@
|
||||||
def main():
|
|
||||||
print("Hello from hardverapro-mcp!")
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
|
@ -9,3 +9,8 @@ dependencies = [
|
||||||
"gradio[mcp]>=5.34.2",
|
"gradio[mcp]>=5.34.2",
|
||||||
"requests>=2.32.4",
|
"requests>=2.32.4",
|
||||||
]
|
]
|
||||||
|
[tool.setuptools.package-dir]
|
||||||
|
"scraper" = "src/scraper"
|
||||||
|
|
||||||
|
[project.scripts]
|
||||||
|
run-scraper = "run:main"
|
16
run.py
16
run.py
|
@ -1,4 +1,10 @@
|
||||||
import scraper
|
import sys
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
# Add the src directory to Python path
|
||||||
|
sys.path.append(str(Path(__file__).parent / "src"))
|
||||||
|
|
||||||
|
from scraper import categories, search, fetch
|
||||||
import gradio as gr
|
import gradio as gr
|
||||||
|
|
||||||
def hardverapro_search(query: str,offset: int = 0, category: str = "All")-> list:
|
def hardverapro_search(query: str,offset: int = 0, category: str = "All")-> list:
|
||||||
|
@ -16,7 +22,7 @@ def hardverapro_search(query: str,offset: int = 0, category: str = "All")-> list
|
||||||
- 'price' (str): The price listed
|
- 'price' (str): The price listed
|
||||||
- 'link' (str): A full URL to the listing
|
- 'link' (str): A full URL to the listing
|
||||||
"""
|
"""
|
||||||
raw = scraper.search(query,offset=offset,category=category)
|
raw = search(query,offset=offset,category=category)
|
||||||
return [[r["title"], r["price"], r["link"]] for r in raw]
|
return [[r["title"], r["price"], r["link"]] for r in raw]
|
||||||
|
|
||||||
def hardverapro_fetch(url: str) -> dict:
|
def hardverapro_fetch(url: str) -> dict:
|
||||||
|
@ -32,7 +38,7 @@ def hardverapro_fetch(url: str) -> dict:
|
||||||
- 'description' (str): The description of the listing
|
- 'description' (str): The description of the listing
|
||||||
- 'img' (str): A full URL to the image
|
- 'img' (str): A full URL to the image
|
||||||
"""
|
"""
|
||||||
return scraper.fetch(url)
|
return fetch(url)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -41,7 +47,7 @@ iface_search = gr.Interface(
|
||||||
inputs=[
|
inputs=[
|
||||||
gr.Textbox(label="Search query"),
|
gr.Textbox(label="Search query"),
|
||||||
gr.Number(label="Offset (e.g. 100, 200)", value=0),
|
gr.Number(label="Offset (e.g. 100, 200)", value=0),
|
||||||
gr.Dropdown(choices=scraper.categories, value="All"),
|
gr.Dropdown(choices=categories, value="All"),
|
||||||
],
|
],
|
||||||
outputs=gr.Dataframe(headers=["title", "price", "link"], type="array"),
|
outputs=gr.Dataframe(headers=["title", "price", "link"], type="array"),
|
||||||
title="HardverApró Search"
|
title="HardverApró Search"
|
||||||
|
@ -61,8 +67,6 @@ tabbed_interface = gr.TabbedInterface(
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
#scraper.fetch("https://hardverapro.hu/apro/sff_2_5_10k_15k_sas_hdd-k_1_2tb-ig/friss.html")
|
|
||||||
#iface.launch(mcp_server=True)
|
|
||||||
tabbed_interface.launch(mcp_server=True)
|
tabbed_interface.launch(mcp_server=True)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
4
src/scraper/__init__.py
Normal file
4
src/scraper/__init__.py
Normal file
|
@ -0,0 +1,4 @@
|
||||||
|
__version__ = "1.0.0"
|
||||||
|
from .scraper import categories, search, fetch
|
||||||
|
|
||||||
|
__all__ = ["fetch", "search", "categories"]
|
|
@ -7,6 +7,7 @@ SEARCH_URL="keres.php?stext="
|
||||||
|
|
||||||
categoriesMap = {
|
categoriesMap = {
|
||||||
"All": "",
|
"All": "",
|
||||||
|
"Hardver": "hardver",
|
||||||
"Alaplap": "hardver/alaplap/",
|
"Alaplap": "hardver/alaplap/",
|
||||||
"Processzor": "hardver/processzor/",
|
"Processzor": "hardver/processzor/",
|
||||||
"Memória": "hardver/memoria/",
|
"Memória": "hardver/memoria/",
|
||||||
|
@ -18,6 +19,7 @@ categoriesMap = {
|
||||||
"Szerver SSD, HDD": "hardver/merevlemez_ssd/szerver_hdd_ssd/",
|
"Szerver SSD, HDD": "hardver/merevlemez_ssd/szerver_hdd_ssd/",
|
||||||
"Adathordozó": "hardver/adathordozo/",
|
"Adathordozó": "hardver/adathordozo/",
|
||||||
"Hálózati termékek": "hardver/halozati_termekek/",
|
"Hálózati termékek": "hardver/halozati_termekek/",
|
||||||
|
"Switch, HUB": "hardver/halozati_termekek/router_switch_repeater/switch_hub/",
|
||||||
"3D nyomtatás": "hardver/3d_nyomtatas/",
|
"3D nyomtatás": "hardver/3d_nyomtatas/",
|
||||||
"Nyomtató, szkenner": "hardver/nyomtato_szkenner/",
|
"Nyomtató, szkenner": "hardver/nyomtato_szkenner/",
|
||||||
"Játékvezérlő, szimulátor": "hardver/jatekvezerlo/",
|
"Játékvezérlő, szimulátor": "hardver/jatekvezerlo/",
|
||||||
|
@ -91,3 +93,7 @@ def findCategories(url= "https://hardverapro.hu/aprok/hardver/index.html"):
|
||||||
href = a_tag['href'] if a_tag and a_tag.has_attr('href') else 'No link'
|
href = a_tag['href'] if a_tag and a_tag.has_attr('href') else 'No link'
|
||||||
href = href.rsplit('/',1)[0][6:] + '/'
|
href = href.rsplit('/',1)[0][6:] + '/'
|
||||||
print(f"\"{name}\": \"{href}\",")
|
print(f"\"{name}\": \"{href}\",")
|
||||||
|
|
||||||
|
|
||||||
|
# Export it explicitly
|
||||||
|
__all__ = ['categories', 'search', 'fetch'] # optional, but good practice
|
Loading…
Reference in a new issue