在这里尝试做同样的事情,发现LinkedIn使用javascript隐藏页面源代码,而javascript是Web驱动程序无法读取的,因为没有HTML可供使用。我正在使用此代码获取内部HTML,但单击connect按钮时出现问题。您会注意到,实际的HTML是在数据变量中的,在这种情况下,我如何才能获得单击来工作。哈尼的帮助将是非常可观的。这段代码是一个大型项目的一小部分,该项目在LinkedIn上做了很多事情。
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.common.exceptions import WebDriverException
import time
from time import sleep
from bs4 import BeautifulSoup
from tqdm import tqdm
import csv
from urllib.parse import urljoin
import re
import sys
import colorama
import random
email = input("Your Login Email Please: ")
print(email)
password = input("Your Password Please: ")
print(password)
time_delay = int(input("Please Enter Delay In Secs For Randomization: "))
print(time_delay)
option = webdriver.ChromeOptions()
option.add_argument("--normal")
option.add_argument("--start-maximized")
option.add_argument("--disable-extensions")
option.add_argument("--auto-open-devtools-for-tabs")
option.add_argument("--disable-infobars")
option.add_argument("--disable-extensions")
driver = webdriver.Chrome(executable_path=r"C:\Users\Rohit.METRO-ROHIT\Desktop\Selenium Development\chromedriver.exe", chrome_options=option)
Development\chromedriver.exe", chrome_options=option)
driver.get('https://www.linkedin.com')
email_box = driver.find_element_by_id('login-email')
email_box.send_keys(email)
time.sleep(random.random() * time_delay)
pass_box = driver.find_element_by_id('login-password')
pass_box.send_keys(password)
time.sleep(random.random() * time_delay)
submit_button = driver.find_element_by_id('login-submit')
submit_button.click()
with open('lets_connect.csv') as example_file:
example_reader = csv.reader(example_file)
for row in example_reader:
time.sleep(random.random() * time_delay*10)
driver.get(row[0])
time.sleep(random.random() * time_delay)
driver.refresh()
print("refreshing the current page")
time.sleep(random.random() * time_delay*2)
demo_div = driver.find_element_by_tag_name('body')
print (demo_div.get_attribute('innerHTML').encode('UTF-8').decode('UTF-8'))
data = (driver.execute_script("return arguments[0].innerHTML", demo_div))
print(data.encode('UTF-8').decode('UTF-8'))
soup = BeautifulSoup(data, "lxml")
try:
connect_button = driver.find_element_by_xpath('//*[@id="ember15196"]/div[2]/div[2]/button[1]')
print("test")
print(connect_button)
print("test")
try:
connect_button.click()
except:
print("cant click")
profile-actions--connect button-primary-large mh1').click()
except WebDriverException:
print ("Connect Button Not Found")