-
Notifications
You must be signed in to change notification settings - Fork 535
Description
I'm trying to scrape the information from TEMU https://www.temu.com/.
Processing https://www.temu.com/vn-en/2--car--------universal--sun----pvc---accessories-----g-601099650626830.html ...
Extracted Data:
Title: No title found.
`import sys
import asyncio
import time
from PyQt6.QtWidgets import QApplication, QMainWindow, QVBoxLayout, QWidget, QPushButton, QTextEdit, QLineEdit, QLabel
from PyQt6.QtCore import QThread, pyqtSignal
from crawlee.playwright_crawler import PlaywrightCrawler, PlaywrightCrawlingContext
class CrawlerThread(QThread):
log_signal = pyqtSignal(str)
data_signal = pyqtSignal(str)
runtime_signal = pyqtSignal(str)
def __init__(self, url):
super().__init__()
self.url = url
self.request_count = 0
self.failed_requests = 0
self.total_duration = 0
async def run_crawler(self):
crawler = PlaywrightCrawler(max_requests_per_crawl=1)
@crawler.router.default_handler
async def request_handler(context: PlaywrightCrawlingContext):
self.log_signal.emit(f"Processing {context.request.url} ...")
start_time = time.time()
try:
# Extract title from the specified div with class '_2rn4tqXP'
title_element = context.page.locator('._2rn4tqXP')
title = await title_element.inner_text() if await title_element.count() > 0 else "No title found."
# Calculate request duration
request_duration = time.time() - start_time
self.request_count += 1
self.total_duration += request_duration
# Emit data signal
self.data_signal.emit(f"Title: {title}\n")
except Exception as e:
self.failed_requests += 1
self.log_signal.emit(f"Error: {e}")
await crawler.run([self.url])
# Calculate and emit runtime statistics
average_duration = self.total_duration / self.request_count if self.request_count > 0 else 0
runtime_stats = (
f"Requests Finished: {self.request_count}\n"
f"Requests Failed: {self.failed_requests}\n"
f"Average Request Duration: {average_duration:.2f} seconds\n"
f"Total Runtime: {self.total_duration:.2f} seconds"
)
self.runtime_signal.emit(runtime_stats)
def run(self):
asyncio.run(self.run_crawler())
class MainWindow(QMainWindow):
def init(self):
super().init()
self.setWindowTitle("Web Data Crawler")
self.setGeometry(100, 100, 800, 600)
# Widgets
self.url_input = QLineEdit()
self.url_input.setPlaceholderText("Enter URL here")
self.start_button = QPushButton("Start Crawling")
self.output_area = QTextEdit()
self.output_area.setReadOnly(True)
self.runtime_label = QLabel("Runtime Statistics:")
# Layout
layout = QVBoxLayout()
layout.addWidget(self.url_input)
layout.addWidget(self.start_button)
layout.addWidget(self.output_area)
layout.addWidget(self.runtime_label)
container = QWidget()
container.setLayout(layout)
self.setCentralWidget(container)
# Connections
self.start_button.clicked.connect(self.start_crawling)
def start_crawling(self):
url = self.url_input.text().strip()
if not url:
self.output_area.setText("Please enter a valid URL.")
return
self.output_area.clear()
# Run the crawler in a separate thread
self.crawler_thread = CrawlerThread(url)
self.crawler_thread.log_signal.connect(self.update_output)
self.crawler_thread.data_signal.connect(self.display_data)
self.crawler_thread.runtime_signal.connect(self.display_runtime)
self.crawler_thread.start()
def update_output(self, text):
self.output_area.append(text)
def display_data(self, data):
self.output_area.append("Extracted Data:\n" + data)
def display_runtime(self, runtime):
self.runtime_label.setText("Runtime Statistics:\n" + runtime)
app = QApplication(sys.argv)
window = MainWindow()
window.show()
sys.exit(app.exec())
`
I've tried but they all return no find.can someone help me?
