I am trying to write a script that helps me to download files from URLs. after downloading all the files I will zip them.
right now I can download files from URLs that have a filename in the URL for instance
https://omextemplates.content.office.net/support/templates/en-us/tf16402488.dotx
my code works like this.
first: I am creating a folder with a unique name.I am getting the folder name from the caller function
def createFolder(folder_name,parent_dir):
directory = folder_name
path = os.path.join(parent_dir , str(directory))
try :
os.mkdir(path)
return path
except OSError as error :
print(error)
return None
Second: I will download all the files into this folder. I am getting the folder path from the caller. the folder path just created by above function
def download_file(url, folder_path, filename_to_be_download=''):
req = requests.get(url, stream = True)
try:
if filename_to_be_download == "":
return None
else:
filename = req.url[downloadUrl.rfind('/')+1:]
file_path = os.path.join(folder_path, filename_to_be_download)
with requests.get(url) as req:
with open(file_path, 'wb') as f:
for chunk in req.iter_content(chunk_size=2024):
if chunk:
f.write(chunk)
return file_path
except Exception as e:
# print(e)
return None
third: I loop over all the download files present inside the folder with unique name in order to create a zip of them
def run():
# for local
parent_dir = "D:/A/scrappers/tmp"
# create Folder with unique name '1234' inside the parent_directory
opportunity_Id = 1234
folder_created_path = createFolder(folder_name=opportunity_Id , parent_dir=parent_dir)
all_Urls = ['https://omextemplates.content.office.net/support/templates/en-us/tf16402488.dotx' ,
'https://procurement-notices.undp.org/view_file.cfm?doc_id=257280']
if folder_created_path :
# we created folder, we store all files in it
all_files_path = []
for eachUrl in all_Urls :
downloadUrl = eachUrl
req = requests.get(downloadUrl)
if req.status_code == 200 :
filename = req.url[downloadUrl.rfind('/') + 1 :]
# adding file path to all_files_path[] list. file just downloaded successfully
downloaded_file_path = download_file(downloadUrl , folder_created_path ,
filename_to_be_download=filename)
if downloaded_file_path :
all_files_path.append(downloaded_file_path)
else :
print("file not downloaded")
else :
print("status code is not 200")
# loop through all files that created and create zip
if len(all_files_path) > 0 :
# writing files to a zipfile
with ZipFile(os.path.join(parent_dir , f"{opportunity_Id}.zip") , 'w' ,
compression=zipfile.ZIP_DEFLATED) as zip :
# writing each file one by one
for file in all_files_path :
zip.write(file)
else :
print("no files to zip them")
else :
print("error while creating folder")
the above script works fine for the first URL inside the all_Urls list. but it does not work for the second URL. I noticed the second URL does not have the file name in it instead the file will download automatically if I put the send use it in the browser URL. how to download files from such a URL and zip them along with my other files
kindly look at enter link description here question as well how