0

Trying to scrape some data. Checking it with print and getting multiple prints..However, the CSV has only one entry. Can you help please? Thanks a lot.

import csv
import time
import requests
from bs4 import BeautifulSoup
from selenium.webdriver.chrome.options import Options
from selenium import webdriver


job_Details = []
job_links = []



chrome_options = Options()
'''chrome_options.add_argument("--headless")'''
driver = webdriver.Chrome(executable_path='C:/bin/chromedriver.exe', options=chrome_options)
driver.get(f'https://remotejobs.world/')
'''SCROLL_PAUSE_TIME = 20'''
last_height = driver.execute_script("return document.body.scrollHeight")
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
divs = driver.find_elements_by_tag_name('h2')
for div in divs:
    job_Details.append(div)
    link = div.find_element_by_tag_name('a')
    job_links.append(link)

for job_detail, job_link in zip(job_Details, job_links):
    if job_detail and job_link:
        print(job_link.get_attribute('href'))
        print(job_detail.text)
        url = job_link.get_attribute('href')
        new_page = requests.get(url).text
        time.sleep(2)
        soup = BeautifulSoup(new_page, 'html.parser')
        job_desc = soup.find('div', class_='w-full md:w-2/3')
        if job_desc:
            print(job_desc.text) #Successful Prints.
            dict = {'Job_title and Company': job_detail.text, "Job link": job_link.get_attribute('href'),
                        "Job Details": job_desc.text}
            with open('remoteWORLD.csv', 'w') as f:
                w = csv.DictWriter(f, dict.keys())
                w.writeheader()
                w.writerow(dict)
Abhishek Rai
  • 1,912
  • 3
  • 11
  • 27

2 Answers2

1

See below how simple my suggestion was.

with open('remoteWORLD.csv', 'w') as f:
    w = csv.DictWriter(f, ['Job_title and Company', "Job link", "Job Details"])
    w.writeheader()
    for job_detail, job_link in zip(job_Details, job_links):
        if job_detail and job_link:
            print(job_link.get_attribute('href'))
            print(job_detail.text)
            url = job_link.get_attribute('href')
            new_page = requests.get(url).text
            time.sleep(2)
            soup = BeautifulSoup(new_page, 'html.parser')
            job_desc = soup.find('div', class_='w-full md:w-2/3')
            if job_desc:
                print(job_desc.text) #Successful Prints.
                dict = {'Job_title and Company': job_detail.text, "Job link": job_link.get_attribute('href'),
                            "Job Details": job_desc.text}
                w.writerow(dict)

Justin Ezequiel
  • 5,254
  • 2
  • 11
  • 14
0

Try using:

with open('remoteWORLD.csv', 'a+') as f:

The w will rewrite the file each time. The a means you can append (the + means it can be read as well as written).

Check here for more explanations: Difference between modes a, a+, w, w+, and r+ in built-in open function?

EDIT: or as Justin says, move it outside of the loop and write your lists job_Details and job_links to it

ob9528
  • 11
  • 3