Home>

You need to add or change the code to the finished code

There is a site that loads some data via json, and my code was initially parsing the html page simply, but then I noticed that the data did not match and as it turned out they were loaded via json

Site Link https://www.mechta.kz/section/smartfony/

Here is a link to the json but it's only one page visible

From here I need to take OldPrice and Price

The code itself is an application that scrapes various sites

This code collects data and uploads

from PyQt5.Qt import *
from pickle import TRUE
import requests
from bs4 import BeautifulSoup
class ThreadM(QThread):
    stepChanged= pyqtSignal(int, int)
    finished= pyqtSignal(list)
    error= pyqtSignal(str)
    def __init__(self, url, file, HEADERS):
        super().__init__()
        self.url= url
        self.file= file
        self.HEADERS= HEADERS
    def run(self):
        self.parseM()
    def parseM(self):
        html= self.get_html()
        if not html:
            if html != False:
                self.error.emit(
                    f'Error: status_code={html.status_code}'
                )
            return
        if html.status_code== 200:
            products= []
            pages_count= self.get_pages_count(html.text)
            for page in range(1, pages_count + 1):
                self.stepChanged.emit(page, pages_count)
                html= self.get_html(params={'page': page})
                products.extend(self.get_content(html.text))
                self.msleep(50)
            self.finished.emit(products)
        else:
            self.error.emit(f'Error: status_code={html.status_code}')
    def get_html(self, params=None):
        try:
            r= requests.get(self.url, headers=self.HEADERS, params=params)
            return r
        except:
            self.error.emit(f'Error: Something went wrong.')
            return False
    def get_pages_count(self, html):
        soup= BeautifulSoup(html, 'html.parser')
        pagination= soup.select('span.block')
        if pagination:
            return int(pagination[-1].get_text().replace('\n', ''))
        else:
            return 1
    def get_content(self, html):
        soup= BeautifulSoup(html, 'html.parser')
        items= soup.find_all("div", class_="hoverCard")
        products= []
        for item in items:
            price= item.find('div', class_='text-ts1')
            old_price= ''
            if price:
                price= price.get_text(strip=TRUE).replace(',', ' ')
            else:
                price= 'Out of stock'
            products.append({
                'title': item.find('div', class_='ellipsis').get_text(strip=TRUE),
                'price': price,
                'old price': old_price
            })
        return products

And here is the application itself, it is most likely not needed, but still

import sys
import os
import os.path
import csv
import tkinter as tk
from tkinter import ttk
from PyQt5 import QtCore, QtGui, QtWidgets
from PyQt5.Qt import *
from Sites.Sulpak import ThreadS
from Sites.Mechta import ThreadM
from pickle import TRUE
class Ui_Form(object):
    def setupUi(self, Form):
        Form.setObjectName("Form")
        form.resize(700, 700)
        font= QtGui.QFont()
        font.setPointSize(1)
        Form.setFont(font)
        Form.setLayoutDirection(QtCore.Qt.LeftToRight)
        Form.setStyleSheet("background: rgb(112, 112, 112);")
        self.pushButton= QtWidgets.QPushButton(Form)self.pushButton.setGeometry(QtCore.QRect(200, 400, 300, 61)) font= QtGui.QFont()
        font.setPointSize(1)
        font.setBold(True)
        fontsetWeight(75)
        self.pushButton.setFont(font)
        self.pushButton.setStyleSheet(" QPushButton{\n"
"\n"
"background: rgb(61,181,233);\n"
"height: 50px;\n"
"border-radius: 10px;\n"
"text-align: center;\n"
"font-size: 20px;\n"
"font-weight: bold;\n"
"}\n"
"\n"
"QPushButton:hover{\n"
"background: rgb(52, 148, 189)\n"
"}")
        self.pushButton.setObjectName("pushButton")
        self.lineEdit= QtWidgets.QLineEdit(Form)
        self.lineEdit.setGeometry(QtCore.QRect(180, 320, 340, 50))
        self.lineEdit.setStyleSheet("QLineEdit{\n"
"border-radius: 10px;\n"
"font-size: 20px;\n"
"font-weight: bold;\n"
"background: white;\n"
"}\n"
"\n"
"QLineEdit:hover {\n"
"border: 3px solid rgb(61,181,233);\n"
"}")
        self.lineEdit.setInputMask("")
        self.lineEdit.setAlignment(QtCore.Qt.AlignCenter)
        self.lineEdit.setObjectName("lineEdit")
        self.lineEdit_2= QtWidgets.QLineEdit(Form)
        self.lineEdit_2.setGeometry(QtCore.QRect(180, 240, 340, 50))
        self.lineEdit_2.setStyleSheet("QLineEdit{\n"
"border-radius: 10px;\n"
"font-size: 20px;\n"
"font-weight: bold;\n"
"background: white;\n"
"}\n"
"\n"
"QLineEdit:hover {\n"
"border: 3px solid rgb(61,181,233);\n"
"}")
        self.lineEdit_2.setInputMask("")
        self.lineEdit_2.setAlignment(QtCore.Qt.AlignCenter)
        self.lineEdit_2.setObjectName("lineEdit_2")
        self.comboBox= QtWidgets.QComboBox(Form)
        self.comboBox.setGeometry(QtCore.QRect(175, 130, 350, 60))
        font= QtGui.QFont()
        font.setPointSize(1)
        font.setBold(True)
        fontsetWeight(75)
        self.comboBox.setFont(font)
        self.comboBox.setMouseTracking(False)
        self.comboBox.setLayoutDirection(QtCore.Qt.LeftToRight)
        self.comboBox.setAutoFillBackground(False)
        self.comboBox.setStyleSheet("QComboBox{\n"
"border-radius: 30px;\n"
"padding-left: 140px;\n"
"background:rgb(56, 56, 56);\n"
"border-bottom: 5px solid rgb(89, 133, 255);\n"
"font-size: 20px;\n"
"color: #fff;\n"
"font-weight: bold;\n"
"}\n"
"\n"
"QComboBox QAbstractItemView {\n"
"text-align: center;\n"
"border-radius: 20px;\n"
"background-color:rgb(56, 56, 56);\n"
"color: white;\n"
"}\n"
"\n"
"QComboBox::drop-down {\n"
"width: 25px;\n"
"height: 25px;\n"
"top: 15px;\n"
"right: 15px;\n"
"}")
        self.comboBox.setInputMethodHints(QtCore.Qt.ImhNone)
        self.comboBox.setObjectName("comboBox")
        self.comboBox.addItem("")
        self.comboBox.addItem("")
        self.widget= QtWidgets.QTextEdit(Form)
        self.widget.setGeometry(QtCore.QRect(150, 499, 400, 151))
        font= QtGui.QFont()
        font.setPointSize(12)
        self.widget.setFont(font)
        self.widget.setStyleSheet("background: #fff;\n"
"color: black;")
        self.widget.setObjectName("widget")
        self.retranslateUi(Form)
        QtCore.QMetaObject.connectSlotsByName(Form)
    def retranslateUi(self, Form):
        _translate= QtCore.QCoreApplication.translate
        Form.setWindowTitle(_translate("Form", "Form"))
        self.pushButton.setText(_translate("Form", "Start"))
        self.lineEdit.setPlaceholderText(_translate("Form", "Enter Directory URL"))
        self.lineEdit_2.setPlaceholderText(_translate("Form", "Enter file name"))
        self.comboBox.setCurrentText(_translate("Form", "Sulpak"))
        self.comboBox.setItemText(0, _translate("Form", "Sulpak"))
        self.comboBox.setItemText(1, _translate("Form", "Dream"))
class MainWindow(QtWidgets.QWidget, Ui_Form):
    HEADERS= {
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.99 Safari/537.36',
        'accept' : '*/*'
    }
    def __init__(self):
        super().__init__()
        self.setupUi(self)
        self.pushButton.clicked.connect(self.btn)self.comboBox.currentTextChanged.connect(self.on_combobox_changed) self.lineEdit_2.setText('.csv')
        self.threads=[]
        self.combo_status=0
    def on_combobox_changed(self, index):
        index= self.comboBox.currentIndex()
        if index== 0:
            self.combo_status=index
        elif index== 1:
            self.combo_status=index
    def btn(self):
        if not self.lineEdit.text() or not self.lineEdit_2.text():
            msg= QMessageBox.information(self, 'Attention', 'Fill in the input fields.')
            return
        self.pushButton.setEnabled(False)
        self.url= self.lineEdit.text()
        self.file= self.lineEdit_2.text()
        if self.combo_status== 0:
            self.thread= ThreadS(self.url, self.file, self.HEADERS)
        elif self.combo_status== 1:
            self.thread= ThreadM(self.url, self.file, self.HEADERS)
        self.threads.append(self.thread)
        self.thread.stepChanged.connect(self.onStepChanged)
        self.thread.finished.connect(self.save_file)
        self.thread.error.connect(self.error)
        self.thread.start()
    def error(self, error):
        self.widget.append(error)
        msg= QMessageBox.information(self, 'Error', error)
        self.pushButton.setEnabled(True)
    def onStepChanged(self, page, pages_count):
        self.widget.append(f'Parse page {page} from {pages_count}...')
    def save_file(self, items):
        if self.combo_status== 0:
            self.combobox_name= "Sparged Data/Sulpak"
        elif self.combo_status== 1:
            self.combobox_name= "Sparred Data/Mechta"
        file_s=self.combobox_name +'/'+self.file
        with open(file_s, 'w', newline='') as file:
            writer= csv.writer(file, delimiter=';')
            writer.writerow(['Model', 'Price', 'Price without discount'])
            for item in items:
                writer.writerow([item['title'],
 item['price'],
 item['old price']])
        self.widget.append(f'{len(items)} items received')
        self.pushButton.setEnabled(True)
if __name__== "__main__":
    app= QApplication(sys.argv)
    w= MainWindow()
    w.show()
    sys.exit(app.exec_())

Need more specific help, a bit tight on my own

  • Answer # 1
    import requests
    url='https://api.retailrocket.net/api/2.0/recommendation/popular/6126195797a528238858a1e2/?&
    stockId=al&
    categoryIds=&
    categoryPaths=%D0%98%D0%BD%D1%82%D0%B5%D1%80%D0%BD%D0%B5%D1%82-%D0%BC%D0%B0%D0%B3%D0% B0%D0%B7%D0%B8%D0%BD%2F%D0%A1%D0%BC%D0%B0%D1%80%D1%82%D1%84%D0%BE%D0%BD%D1% 8B%20%D0%B8%20%D0%B3%D0%B0%D0%B4%D0%B6%D0%B5%D1%82%D1%8B%2F%D0%A1%D0%BC%D0% B0%D1%80%D1%82%D1%84%D0%BE%D0%BD%D1%8B&
    format=json'
    titles= requests.get(url).json()
    for title in titles:
         print(title['Name'], title['OldPrice'], title['Price'])
    
  • Answer # 2
    import requests
    url='https://api.retailrocket.net/api/2.0/recommendation/popular/6126195797a528238858a1e2/?&
    stockId=al&
    categoryIds=&
    categoryPaths=%D0%98%D0%BD%D1%82%D0%B5%D1%80%D0%BD%D0%B5%D1%82-%D0%BC%D0%B0%D0%B3%D0% B0%D0%B7%D0%B8%D0%BD%2F%D0%A1%D0%BC%D0%B0%D1%80%D1%82%D1%84%D0%BE%D0%BD%D1% 8B%20%D0%B8%20%D0%B3%D0%B0%D0%B4%D0%B6%D0%B5%D1%82%D1%8B%2F%D0%A1%D0%BC%D0% B0%D1%80%D1%82%D1%84%D0%BE%D0%BD%D1%8B&
    format=json'
    titles= requests.get(url).json()
    for title in titles:
         print(title['Name'], title['OldPrice'], title['Price'])