在 Python 中使用代理运行 Selenium WebDriver

我正在尝试用 Python 运行一个 SeleniumWebDriver 脚本来完成一些基本任务。当通过 Selenium IDE 接口运行机器人时,我可以让它完美地工作(即: 当只是让 GUI 重复我的操作时)。然而,当我将代码导出为 Python 脚本并尝试从命令行执行它时,Firefox 浏览器将打开,但永远无法访问起始 URL (一个错误返回到命令行,程序停止)。这是发生我不管什么网站等我试图访问。

出于演示的目的,我在这里包含了一个非常基本的代码。我认为我没有正确地包含代码的代理部分,因为返回的错误似乎是由代理生成的。

任何帮助都将不胜感激。

下面的代码只是用来打开 www.google.ie 并搜索单词“ selenium”。对我来说,它打开一个空白的火狐浏览器,然后停止。

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import Select
from selenium.common.exceptions import NoSuchElementException
import unittest, time, re
from selenium.webdriver.common.proxy import *


class Testrobot2(unittest.TestCase):
def setUp(self):


myProxy = "http://149.215.113.110:70"


proxy = Proxy({
'proxyType': ProxyType.MANUAL,
'httpProxy': myProxy,
'ftpProxy': myProxy,
'sslProxy': myProxy,
'noProxy':''})


self.driver = webdriver.Firefox(proxy=proxy)
self.driver.implicitly_wait(30)
self.base_url = "https://www.google.ie/"
self.verificationErrors = []
self.accept_next_alert = True


def test_robot2(self):
driver = self.driver
driver.get(self.base_url + "/#gs_rn=17&gs_ri=psy-ab&suggest=p&cp=6&gs_id=ix&xhr=t&q=selenium&es_nrs=true&pf=p&output=search&sclient=psy-ab&oq=seleni&gs_l=&pbx=1&bav=on.2,or.r_qf.&bvm=bv.47883778,d.ZGU&fp=7c0d9024de9ac6ab&biw=592&bih=665")
driver.find_element_by_id("gbqfq").clear()
driver.find_element_by_id("gbqfq").send_keys("selenium")


def is_element_present(self, how, what):
try: self.driver.find_element(by=how, value=what)
except NoSuchElementException, e: return False
return True


def is_alert_present(self):
try: self.driver.switch_to_alert()
except NoAlertPresentException, e: return False
return True


def close_alert_and_get_its_text(self):
try:
alert = self.driver.switch_to_alert()
alert_text = alert.text
if self.accept_next_alert:
alert.accept()
else:
alert.dismiss()
return alert_text
finally: self.accept_next_alert = True


def tearDown(self):
self.driver.quit()
self.assertEqual([], self.verificationErrors)


if __name__ == "__main__":
unittest.main()
225183 次浏览

How about something like this

PROXY = "149.215.113.110:70"


webdriver.DesiredCapabilities.FIREFOX['proxy'] = {
"httpProxy":PROXY,
"ftpProxy":PROXY,
"sslProxy":PROXY,
"noProxy":None,
"proxyType":"MANUAL",
"class":"org.openqa.selenium.Proxy",
"autodetect":False
}


# you have to use remote, otherwise you'll have to code it yourself in python to
driver = webdriver.Remote("http://localhost:4444/wd/hub", webdriver.DesiredCapabilities.FIREFOX)

You can read more about it here.

My solution:

def my_proxy(PROXY_HOST,PROXY_PORT):
fp = webdriver.FirefoxProfile()
# Direct = 0, Manual = 1, PAC = 2, AUTODETECT = 4, SYSTEM = 5
print PROXY_PORT
print PROXY_HOST
fp.set_preference("network.proxy.type", 1)
fp.set_preference("network.proxy.http",PROXY_HOST)
fp.set_preference("network.proxy.http_port",int(PROXY_PORT))
fp.set_preference("general.useragent.override","whater_useragent")
fp.update_preferences()
return webdriver.Firefox(firefox_profile=fp)

Then call in your code:

my_proxy(PROXY_HOST,PROXY_PORT)

I had issues with this code because I was passing a string as a port #:

 PROXY_PORT="31280"

This is important:

int("31280")

You must pass an integer instead of a string or your firefox profile will not be set to a properly port and connection through proxy will not work.

Try setting up sock5 proxy too. I was facing the same problem and it is solved by using the socks proxy

def install_proxy(PROXY_HOST,PROXY_PORT):
fp = webdriver.FirefoxProfile()
print PROXY_PORT
print PROXY_HOST
fp.set_preference("network.proxy.type", 1)
fp.set_preference("network.proxy.http",PROXY_HOST)
fp.set_preference("network.proxy.http_port",int(PROXY_PORT))
fp.set_preference("network.proxy.https",PROXY_HOST)
fp.set_preference("network.proxy.https_port",int(PROXY_PORT))
fp.set_preference("network.proxy.ssl",PROXY_HOST)
fp.set_preference("network.proxy.ssl_port",int(PROXY_PORT))
fp.set_preference("network.proxy.ftp",PROXY_HOST)
fp.set_preference("network.proxy.ftp_port",int(PROXY_PORT))
fp.set_preference("network.proxy.socks",PROXY_HOST)
fp.set_preference("network.proxy.socks_port",int(PROXY_PORT))
fp.set_preference("general.useragent.override","Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.75.14 (KHTML, like Gecko) Version/7.0.3 Safari/7046A194A")
fp.update_preferences()
return webdriver.Firefox(firefox_profile=fp)

Then call install_proxy ( ip , port ) from your program.

If anyone is looking for a solution here's how :

from selenium import webdriver
PROXY = "YOUR_PROXY_ADDRESS_HERE"
webdriver.DesiredCapabilities.FIREFOX['proxy']={
"httpProxy":PROXY,
"ftpProxy":PROXY,
"sslProxy":PROXY,
"noProxy":None,
"proxyType":"MANUAL",
"autodetect":False
}
driver = webdriver.Firefox()
driver.get('http://www.whatsmyip.org/')

Try by Setting up FirefoxProfile

from selenium import webdriver
import time




"Define Both ProxyHost and ProxyPort as String"
ProxyHost = "54.84.95.51"
ProxyPort = "8083"






def ChangeProxy(ProxyHost ,ProxyPort):
"Define Firefox Profile with you ProxyHost and ProxyPort"
profile = webdriver.FirefoxProfile()
profile.set_preference("network.proxy.type", 1)
profile.set_preference("network.proxy.http", ProxyHost )
profile.set_preference("network.proxy.http_port", int(ProxyPort))
profile.update_preferences()
return webdriver.Firefox(firefox_profile=profile)




def FixProxy():
""Reset Firefox Profile""
profile = webdriver.FirefoxProfile()
profile.set_preference("network.proxy.type", 0)
return webdriver.Firefox(firefox_profile=profile)




driver = ChangeProxy(ProxyHost ,ProxyPort)
driver.get("http://whatismyipaddress.com")


time.sleep(5)


driver = FixProxy()
driver.get("http://whatismyipaddress.com")

This program tested on both Windows 8 and Mac OSX. If you are using Mac OSX and if you don't have selenium updated then you may face selenium.common.exceptions.WebDriverException. If so, then try again after upgrading your selenium

pip install -U selenium

Works for me this way (similar to @Amey and @user4642224 code, but shorter a bit):

from selenium import webdriver
from selenium.webdriver.common.proxy import Proxy, ProxyType


prox = Proxy()
prox.proxy_type = ProxyType.MANUAL
prox.http_proxy = "ip_addr:port"
prox.socks_proxy = "ip_addr:port"
prox.ssl_proxy = "ip_addr:port"


capabilities = webdriver.DesiredCapabilities.CHROME
prox.add_to_capabilities(capabilities)


driver = webdriver.Chrome(desired_capabilities=capabilities)

try running tor service, add the following function to your code.

def connect_tor(port):


socks.set_default_proxy(socks.PROXY_TYPE_SOCKS5, '127.0.0.1', port, True)
socket.socket = socks.socksocket


def main():


connect_tor()
driver = webdriver.Firefox()

The result stated above may be correct, but isn't working with the latest webdriver. Here is my solution for the above question. Simple and sweet


http_proxy  = "ip_addr:port"
https_proxy = "ip_addr:port"


webdriver.DesiredCapabilities.FIREFOX['proxy']={
"httpProxy":http_proxy,
"sslProxy":https_proxy,
"proxyType":"MANUAL"
}


driver = webdriver.Firefox()

OR

    http_proxy  = "http://ip:port"
https_proxy = "https://ip:port"


proxyDict = {
"http"  : http_proxy,
"https" : https_proxy,
}


driver = webdriver.Firefox(proxy=proxyDict)

Proxy with verification. This is a whole new python script in reference from a Mykhail Martsyniuk sample script.

# Load webdriver
from selenium import webdriver


# Load proxy option
from selenium.webdriver.common.proxy import Proxy, ProxyType


# Configure Proxy Option
prox = Proxy()
prox.proxy_type = ProxyType.MANUAL


# Proxy IP & Port
prox.http_proxy = “0.0.0.0:00000”
prox.socks_proxy = “0.0.0.0:00000”
prox.ssl_proxy = “0.0.0.0:00000”


# Configure capabilities
capabilities = webdriver.DesiredCapabilities.CHROME
prox.add_to_capabilities(capabilities)


# Configure ChromeOptions
driver = webdriver.Chrome(executable_path='/usr/local/share chromedriver',desired_capabilities=capabilities)


# Verify proxy ip
driver.get("http://www.whatsmyip.org/")

As stated by @Dugini, some config entries have been removed. Maximal:

webdriver.DesiredCapabilities.FIREFOX['proxy'] = {
"httpProxy":PROXY,
"ftpProxy":PROXY,
"sslProxy":PROXY,
"noProxy":[],
"proxyType":"MANUAL"
}

The answers above and on this question either didn't work for me with Selenium 3.14 and Firefox 68.9 on Linux, or are unnecessarily complex. I needed to use a WPAD configuration, sometimes behind a proxy (on a VPN), and sometimes not. After studying the code a bit, I came up with:

from selenium import webdriver
from selenium.webdriver.common.proxy import Proxy
from selenium.webdriver.firefox.firefox_profile import FirefoxProfile


proxy = Proxy({'proxyAutoconfigUrl': 'http://wpad/wpad.dat'})
profile = FirefoxProfile()
profile.set_proxy(proxy)
driver = webdriver.Firefox(firefox_profile=profile)

The Proxy initialization sets proxyType to ProxyType.PAC (autoconfiguration from a URL) as a side-effect.

It also worked with Firefox's autodetect, using:

from selenium.webdriver.common.proxy import ProxyType


proxy = Proxy({'proxyType': ProxyType.AUTODETECT})

But I don't think this would work with both internal URLs (not proxied) and external (proxied) the way WPAD does. Similar proxy settings should work for manual configuration as well. The possible proxy settings can be seen in the code here.

Note that directly passing the Proxy object as proxy=proxy to the driver does NOT work--it's accepted but ignored (there should be a deprecation warning, but in my case I think Behave is swallowing it).

This worked for me and allow to use an headless browser, you just need to call the method passing your proxy.

def setProxy(proxy):
options = Options()
options.headless = True
#options.add_argument("--window-size=1920,1200")
options.add_argument("--disable-dev-shm-usage")
options.add_argument("--no-sandbox")
prox = Proxy()
prox.proxy_type = ProxyType.MANUAL
prox.http_proxy = proxy
prox.ssl_proxy = proxy
capabilities = webdriver.DesiredCapabilities.CHROME
prox.add_to_capabilities(capabilities)
return webdriver.Chrome(desired_capabilities=capabilities, options=options, executable_path=DRIVER_PATH)

Scraping the data from any online source is quite easy when scraping APIs are used. You can try using scraper API to scrape the information from webpages and it automatically parses the web data. API can be integrated into your source code as well. Other than using API to scrape data, you can try the under-mentioned source code in beautiful soup to scrape data using CSS selectors. Before trying this code, please note that the select() method can be utilized to find numerous elements. Along with that, select_one() to be used search single element.

Source Code:

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
PROXY = "177.46.141.143:59393" #your proxy  (ip address: port no)
chrome_options = WebDriverWait.ChromeOptions()
chrome_options.add_argument('--proxy-server=%s' % PROXY)
chrome = webdriver.Chrome(chrome_options=chrome_options)
chrome.get("https://www.ipchicken.com/")

It's quite an old post, however, for others, it might still benefit by providing the answer as of today, and yet originally author was extremely close to a working solution.

First of all, the ftpProxy setting is no longer supported at this time and will throw an error

proxy = Proxy({
'proxyType': ProxyType.MANUAL,
'httpProxy': myProxy,
'ftpProxy': myProxy, # this will throw an error
'sslProxy': myProxy,
'noProxy':''})

Next, instead of setting the proxy property, you should be using firefox options like so

proxy = Proxy({
'proxyType': ProxyType.MANUAL,
'httpProxy': myProxy,
'sslProxy': myProxy,
'noProxy': ''})


options = Options()
options.proxy = proxy
driver = webdriver.Firefox(options=options)

Additionally, don't define the scheme when specifying the proxy, especially if you want to use the same proxy for multiple protocols

myProxy = "149.215.113.110:70"

All together it looks like this

from selenium import webdriver
from selenium.webdriver.common.proxy import *
from selenium.webdriver.firefox.options import Options


myProxy = "149.215.113.110:70"
proxy = Proxy({
'proxyType': ProxyType.MANUAL,
'httpProxy': myProxy,
'sslProxy': myProxy,
'noProxy': ''})


options = Options()
options.proxy = proxy
driver = webdriver.Firefox(options=options)
driver.get("https://www.google.ie")

This helps me in September 2022 - proxy for selenium with Auth user+password

import os
import zipfile


from selenium import webdriver


PROXY_HOST = '192.168.3.2'  # rotating proxy or host
PROXY_PORT = 8080 # port
PROXY_USER = 'proxy-user' # username
PROXY_PASS = 'proxy-password' # password


manifest_json = """
{
"version": "1.0.0",
"manifest_version": 2,
"name": "Chrome Proxy",
"permissions": [
"proxy",
"tabs",
"unlimitedStorage",
"storage",
"<all_urls>",
"webRequest",
"webRequestBlocking"
],
"background": {
"scripts": ["background.js"]
},
"minimum_chrome_version":"22.0.0"
}
"""


background_js = """
var config = {
mode: "fixed_servers",
rules: {
singleProxy: {
scheme: "http",
host: "%s",
port: parseInt(%s)
},
bypassList: ["localhost"]
}
};
chrome.proxy.settings.set({value: config, scope: "regular"}, function() {});
function callbackFn(details) {
return {
authCredentials: {
username: "%s",
password: "%s"
}
};
}
chrome.webRequest.onAuthRequired.addListener(
callbackFn,
{urls: ["<all_urls>"]},
['blocking']
);
""" % (PROXY_HOST, PROXY_PORT, PROXY_USER, PROXY_PASS)


def get_chromedriver(use_proxy=False, user_agent=None):
path = os.path.dirname(os.path.abspath(__file__))
chrome_options = webdriver.ChromeOptions()
if use_proxy:
pluginfile = 'proxy_auth_plugin.zip'


with zipfile.ZipFile(pluginfile, 'w') as zp:
zp.writestr("manifest.json", manifest_json)
zp.writestr("background.js", background_js)
chrome_options.add_extension(pluginfile)
if user_agent:
chrome_options.add_argument('--user-agent=%s' % user_agent)
driver = webdriver.Chrome(
os.path.join(path, 'chromedriver'),
chrome_options=chrome_options)
return driver


def main():
driver = get_chromedriver(use_proxy=True)
driver.get('https://ifconfig.me/)


if __name__ == '__main__':
main()

source link

Surprised to see no examples of using authenticated proxies.

October 2022 solution for authenticated proxies (Firefox & Chrome):

from selenium import webdriver


PROXY_HOST = "0.0.0.0";
PROXY_PORT = "0000"
PROXY_USERNAME = "user"
PROXY_PASS = "pass"


# If you're using Firefox
profile = webdriver.FirefoxProfile()
profile.set_preference("network.proxy.type", 1)
profile.set_preference("network.proxy.http",PROXY_HOST)
profile.set_preference("network.proxy.http_port", PROXY_PORT)
fp.set_preference('network.proxy.no_proxies_on', 'localhost, 127.0.0.1')
credentials = '%s:%s' % (PROXY_USERNAME, PROXY_PASS)
credentials = b64encode(credentials.encode('ascii')).decode('utf-8')
fp.set_preference('extensions.closeproxyauth.authtoken', credentials)
driver = webdriver.Firefox(firefox_profile=profile)


# If you're using Chrome
chrome_options = WebDriver.ChromeOptions()
options.add_argument('--proxy-server=http://%s:%s@%s:%s' % (PROXY_HOST, PROXY_PORT, PROXY_USERNAME, PROXY_PASS))
driver = webdriver.Chrome(executable_path='chromedriver.exe', chrome_options=chrome_options)


# Proxied request
driver.get("https://www.google.com")

Some useful proxy network resources with quick Selenium Integrations

  1. Bright Data
  2. SOAX
  3. NetNut