I'm trying to scrape google maps reviews of someplace and I'm following this short explanation of how to do it but there is some error always occurs i don't know how to solve it
explanation site : https://medium.com/swlh/scraping-google-maps-using-selenium-3cec08eb6a92
Error message that occurs
C:\Users\onefo\PycharmProjects\project\main.py:29: DeprecationWarning: executable_path has been deprecated, please pass in a Service object self.driver = webdriver.Chrome(self.PATH, options=self.options)
C:\Users\onefo\PycharmProjects\project\main.py:29: DeprecationWarning: executable_path has been deprecated, please pass in a Service object self.driver = webdriver.Chrome(self.PATH, options=self.options)
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.action_chains import ActionChains
import time
class WebDriver:
location_data = {}
def __init__(self):
self.PATH = "chromedriver.exe"
self.options = Options()
self.options.add_argument("--headless")
self.driver = webdriver.Chrome(self.PATH, options=self.options)
self.location_data["Reviews"] = []
def click_all_reviews_button(self):
try:
WebDriverWait(self.driver, 20).until(EC.presence_of_element_located((By.CLASS_NAME, "M77dve")))
element = self.driver.find_element_by_class_name("M77dve")
element.click()
except:
self.driver.quit()
return False
return True
def scroll_the_page(self):
try:
WebDriverWait(self.driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, "section-layout-root"))) # Waits for the page to load.
pause_time = 2 # Waiting time after each scroll.
max_count = 5 # Number of times we will scroll the scroll bar to the bottom.
x = 0
while(x<max_count):
scrollable_div = self.driver.find_element_by_css_selector('div.section-layout.section-scrollbox.scrollable-y.scrollable-show') # It gets the section of the scroll bar.
try:
self.driver.execute_script('arguments[0].scrollTop = arguments[0].scrollHeight', scrollable_div) # Scroll it to the bottom.
except:
pass
time.sleep(pause_time) # wait for more reviews to load.
x=x+1
except:
self.driver.quit()
def expand_all_reviews(self):
try:
element = self.driver.find_elements_by_class_name("ODSEW-KoToPc-ShBeI gXqMYb-hSRGPd")
for i in element:
i.click()
except:
pass
def get_reviews_data(self):
try:
review_names = self.driver.find_elements_by_class_name("ODSEW-ShBeI-title") # Its a list of all the HTML sections with the reviewer name.
review_text = self.driver.find_elements_by_class_name("ODSEW-ShBeI-ShBeI-content") # Its a list of all the HTML sections with the reviewer reviews.
review_dates = self.driver.find_elements_by_css_selector("[class='ODSEW-ShBeI-RgZmSc-date']") # Its a list of all the HTML sections with the reviewer reviewed date.
review_stars = self.driver.find_elements_by_css_selector("[class='ODSEW-ShBeI-H1e3jb']") # Its a list of all the HTML sections with the reviewer rating.
review_stars_final = []
for i in review_stars:
review_stars_final.append(i.get_attribute("aria-label"))
review_names_list = [a.text for a in review_names]
review_text_list = [a.text for a in review_text]
review_dates_list = [a.text for a in review_dates]
review_stars_list = [a for a in review_stars_final]
for (a,b,c,d) in zip(review_names_list, review_text_list, review_dates_list, review_stars_list):
self.location_data["Reviews"].append({"name":a, "review":b, "date":c, "rating":d})
except Exception as e:
pass
def scrape(self, url): # Passed the URL as a variable
try:
self.driver.get(url) # Get is a method that will tell the driver to open at that particular URL
except Exception as e:
self.driver.quit()
return
time.sleep(10) # Waiting for the page to load.
self.click_open_close_time() # Calling the function to click the open and close time button.
self.get_location_data() # Calling the function to get all the location data.
self.get_location_open_close_time() # Calling to get open and close time for each day.
self.get_popular_times() # Gets the busy percentage for each hour of each day.
if (
self.click_all_reviews_button() == False): # Clicking the all reviews button and redirecting the driver to the all reviews page.
return (self.location_data)
time.sleep(5) # Waiting for the all reviews page to load.
self.scroll_the_page() # Scrolling the page to load all reviews.
self.expand_all_reviews() # Expanding the long reviews by clicking see more button in each review.
self.get_reviews_data() # Getting all the reviews data.
self.driver.quit() # Closing the driver instance.
return (self.location_data) # Returning the Scraped Data.
url = "https://www.google.com/maps/place/%D9%83%D9%88%D9%81+%D9%84%D9%84%D9%82%D9%87%D9%88%D8%A9+%D8%A7%D9%84%D9%85%D8%AE%D8%AA%D8%B5%D8%A9%E2%80%AD/@16.8992786,42.6584989,26701m/data=!3m1!1e3!4m9!1m2!2m1!1scoffeeshop!3m5!1s0x160809ac263c4b37:0x1f6fb3095035e8ec!8m2!3d16.9064866!4d42.5439767!15sCgpjb2ZmZWVzaG9wWgwiCmNvZmZlZXNob3CSAQtjb2ZmZWVfc2hvcA"
x = WebDriver()
print(x.scrape(url))