Home>

I'm new to Python. I use selenium to get operation information from the Tokyo Metro homepage, but because it takes time, I wanted to use multiprocessing for parallel processing. I tried to write the code referring to various sites, but I got an error. I searched from the situation and error text on my own, but I couldn't find a solution. . . I would be happy if someone could tell me.

Code
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import time
from multiprocessing import Pool
import chromedriver_binary
def fetchclass (url):
    driver.get (url)
    time.sleep (3)
    html = driver.page_source
    soup = BeautifulSoup (html, 'lxml')
    text = soup.find (class _ = 'v2_unkouReportInfo'). text
    return print (text.strip ())
# Start browser
options = Options ()
options.add_argument ('-headless')
driver = webdriver.Chrome (options = options)
urls = ['https://www.tokyometro.jp/unkou/history/ginza.html','https://www.tokyometro.jp/unkou/history/marunouchi.html',
      'https://www.tokyometro.jp/unkou/history/hibiya.html','https://www.tokyometro.jp/unkou/history/touzai.html',
      'https://www.tokyometro.jp/unkou/history/chiyoda.html','https://www.tokyometro.jp/unkou/history/yurakucho.html',
      'https://www.tokyometro.jp/unkou/history/hanzoumon.html','https://www.tokyometro.jp/unkou/history/nanboku.html',
if __name__ == "__main__":
    p = Pool (4)
    result = p.map (fetchclass, urls)
    print (result)
# Close browser
driver.quit ()
Error text excerpt
multiprocessing.pool.RemoteTraceback:
ConnectionRefusedError: [WinError 10061] The connection could not be made because it was rejected by the target computer.
During handling of the above exception, another exception occurred:
urllib3.exceptions.NewConnectionError:<urllib3.connection.HTTPConnection object at 0x00000206A123B688>: Failed to establish a new connection: [WinError 10061] The connection could not be made because it was rejected by the target computer.
The above exception was the direct cause of the following exception:
  • Answer # 1

    from bs4 import BeautifulSoup
    from selenium import webdriver
    from selenium.webdriver.chrome.options import Options
    import time, sys
    from multiprocessing import Pool
    #import chromedriver_binary
    def fetchclass (url):
        options = Options ()
        options.add_argument ('-headless')
        driver = webdriver.Chrome (options = options)
        driver.get (url)
        time.sleep (5)
        html = driver.page_source
        soup = BeautifulSoup (html, 'lxml')
        text = soup.find (class _ = 'v2_unkouReportInfo'). text
        # sys.stdout.buffer.write (text.encode ('utf-8'))
        driver.quit ()
        return (text.strip ())
    urls = ['https://www.tokyometro.jp/unkou/history/ginza.html','https://www.tokyometro.jp/unkou/history/marunouchi.html',
          'https://www.tokyometro.jp/unkou/history/hibiya.html','https://www.tokyometro.jp/unkou/history/touzai.html',
          'https://www.tokyometro.jp/unkou/history/chiyoda.html','https://www.tokyometro.jp/unkou/history/yurakucho.html',
          'https://www.tokyometro.jp/unkou/history/hanzoumon.html','https://www.tokyometro.jp/unkou/history/nanboku.html',
    if __name__ == "__main__":
        with Pool (9) as p:
            result = (p.map (fetchclass, urls))
            for l in result:
                sys.stdout.buffer.write (l.encode ('utf-8'))
                #print (l)

  • Answer # 2

      

    ConnectionRefusedError: [WinError 10061] The connection could not be made because it was rejected by the target computer.

    I can't connect, so what can I do on this side?

  • Answer # 3

    If you get status here, you only need it once

    There is Japanese at the bottom.

    import requests
    import json
    r = requests.get (url)
    r.raise_for_status ()
    # convert jsonp to json
    data_json = r.text.split ("(", 1) [1] .strip (")")
    result = json.loads (data_json)
    for line in result ["jp"] ["lines"]:
        print (f "{line ['line_name']}: {line ['contents']}")