Launching selenium and geckodriver on astro-runtime with a profile

In astro-runtime, we have access to firefox-esr, instead of the normal firefox you run on desktop. This means a profile created on your desktop version of firefox won’t work in firefox-esr. The compatibility isn’t there.

So if you’re using selenium + geckodriver, you might see an error like this when you load in a profile originating from your normal firefox application:

selenium.common.exceptions.WebDriverException: Message: Process unexpectedly closed with status 0

To avoid this, you have to:

  1. Delete compatibility.ini, which will generate a new one specifically for firefox-esr
  2. Add to your options object options.add_argument('--allow-downgrade')

Full code I used:

import datetime
import os
import json
import time

from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import StaleElementReferenceException
from selenium.common.exceptions import TimeoutException

from webdriver_manager.firefox import GeckoDriverManager
from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
from selenium.webdriver.firefox.firefox_profile import FirefoxProfile
from selenium.webdriver.firefox.service import Service
from selenium.webdriver.firefox.options import Options

from airflow import DAG

from airflow.operators.dummy_operator import DummyOperator
from airflow.operators.python_operator import PythonOperator

def main():
    
    options = webdriver.FirefoxOptions()
    options.headless = True
    options.add_argument('--disable-gpu')
    options.add_argument('--allow-downgrade')
    options.add_argument('--no-sandbox')
    options.binary_location = '/usr/bin/firefox'
    
    # Set profile
    profile_path = "/usr/local/airflow/include/Firefox/Profiles/ProfileA"
    profile = webdriver.FirefoxProfile(profile_path)
    print('Profile set.')

    #Disable cache
    profile.set_preference("browser.cache.disk.enable", False)
    profile.set_preference("browser.cache.memory.enable", False)
    profile.set_preference("browser.cache.offline.enable", False)
    profile.set_preference("network.http.use-cache", False)

    # Start driver
    service = Service(GeckoDriverManager().install())
    driver = webdriver.Firefox(profile,service=service,options=options)
    
    print('Driver initialized')
    
    driver.get('https://www.google.com/')
    
    print(f'Current URL: {driver.current_url}')
    

with DAG(
    'test_selenium_geckodriver',
    description='testing selenium and geckodriver',
    start_date=datetime.datetime(2021, 1, 1),
    schedule_interval= '@daily',
    catchup=False
) as dag:

    main = PythonOperator(
        task_id='start_selenium',
        python_callable=main,
        provide_context=True
    )

    main