Home>

The parser was working, but suddenly it started giving an error:

My code:

def get_third_url (self):
    log.info ('Parsim')
    text= self.session.get ('https://u.to/ofd6Gw') .text
    soup= BeautifulSoup (text, 'lxml')
    pagination= int (soup.select ('div.page-navigation a') [-2] .text)
    for page in range (1, pagination + 1):
        log.info (f'Page {page} /{pagination} ')
        if page!= 1:
            text= self.session.get ('https://u.to/ofd6Gw',
                                    params= {'PAGEN_2': page}). text
            soup= BeautifulSoup (text, 'lxml')
        urls= list (map (lambda x: 'https://u.to/v1qrGw' + x.get ('href'),
                        soup.select ('div.bordered.product_name a')))
        for url in urls:
            text= self.session.get (url) .text
            soup= BeautifulSoup (text, 'lxml')
            category= list (map (lambda x: x.text, soup.select ('div.bx-breadcrumb-item span') [2: -2]))
            if all (list (
                    map (lambda x: True if x.replace ('/', '') .strip () not in setting.not_parsing_category else False,
                        category))):
                category= '' .join (category)
                name= soup.select_one ('div.name'). text
                meta= soup.select ('div.bx_rt meta') [0] .get ('content')
                articul= meta.replace (name, '') .replace ('NORDBERG', '') .replace ('Nordberg', '') .split ('(') [
                    0] .strip ()
                if articul== '':
                    continue
                brand= 'Nordberg'
                characteristics= list (filter (lambda x: True if x!= '' and x!= u '\ xa0' else False, list (
                    map (lambda x: x.text, soup.select ('div.properties-tth table td')))))
                characteristics= list (
                    characteristics [i] + ':' + characteristics [i + 1] for i in range (0, len (characteristics), 2))
                info= soup.select_one ('div.tab.tab-description.current')
                info= info.text.split ('\ n') if info is not None else []
                short_info= []
                len_= 0
                for i in range (len (info)):
                    if len_ + len (info [i]) <
= 350:
                        len_ += len (info [i])
                        if i== len (info) -1:
                            short_info= info [::]
                    else:
                        short_info= info [: i]
                        break
                info.extend (characteristics)
                short_info= '<
div class= "offers_info_list" >
'+' '.join (
                    list (map (lambda x: '<
p >
'+ x.strip () +' <
/p >
', short_info))) +' <
/div >
'
                info= '<
div class= "offers_info_card" >
'+' '.join (
                    list (map (lambda x: '<
p >
'+ x.strip () +' <
/p >
', info))) +' <
/div >
'
                images= list (
                    map (lambda x: 'https://u.to/v1qrGw' + x.get ('style'). replace (
                        "background-image: url ('",
                        '') .replace ("');",' '),
                        soup.select ('span.cnt_item') [: 4]))
                if images:
                    main_image= images [0]
                    dop_images= '|' .join (images [1:])
                else:
                    main_image= ''
                    dop_images= ''
                price= '' .join (soup.select_one ('span.price-label'). text.split ('') [: -1]). strip (). replace ('', '')
                if price!= '':
                    price= int (price)
                else:
                    continue
                self.result.append (
                    [category, name, brand, articul, short_info, info, main_image, dop_images, price, 'nordberg'])
    log.info ('SUCCESSFUL')

Please tell me what I'm making a mistake, although everything worked and now works, but on pages 28-29, it stops.

Thank you!

Well, you write there in which line the error is. So look what can be None on that line. Some element is probably not found. Save this page and analyze what is wrong there.

CrazyElf2021-10-13 08:00:56

I don't see the line, if you are talking about the number 206, then it is the same on all pages, there is a Yandex script and no more.

Nicolo2021-10-13 11:30:37