I'm not sure if this will be helpful, but I had the problem where I needed to load a data frame with images, so I wrote the following code. I hope this helps. NOTE: I am not a Python developer, and this is my first time using these libraries, so I am almost certain there are ways to improve this code.
import base64
from io import BytesIO
import openpyxl
import pandas as pd
from openpyxl_image_loader import SheetImageLoader
def load_dataframe(dataframe_file_path: str, dataframe_sheet_name: str) -> pd.DataFrame:
# By default, it appears that pandas does not read images, as it uses only openpyxl to read
# the file. As a result we need to load into memory the dataframe and explicitly load in
# the images, and then convert all of this to HTML and put it back into the normal
# dataframe, ready for use.
pxl_doc = openpyxl.load_workbook(dataframe_file_path)
pxl_sheet = pxl_doc[dataframe_sheet_name]
pxl_image_loader = SheetImageLoader(pxl_sheet)
pd_df = pd.read_excel(dataframe_file_path, sheet_name=dataframe_sheet_name)
for pd_row_idx, pd_row_data in pd_df.iterrows():
for pd_column_idx, _pd_cell_data in enumerate(pd_row_data):
# Offset as openpyxl sheets index by one, and also offset the row index by one more to account for the header row
pxl_cell_coord_str = pxl_sheet.cell(pd_row_idx + 2, pd_column_idx + 1).coordinate
if pxl_image_loader.image_in(pxl_cell_coord_str):
# Now that we have a cell that contains an image, we want to convert it to
# base64, and it make it nice and HTML, so that it loads in a front end
pxl_pil_img = pxl_image_loader.get(pxl_cell_coord_str)
with BytesIO() as pxl_pil_buffered:
pxl_pil_img.save(pxl_pil_buffered, format="PNG")
pxl_pil_img_b64_str = base64.b64encode(pxl_pil_buffered.getvalue())
pd_df.iat[pd_row_idx, pd_column_idx] = '<img src="data:image/png;base64,' + \
pxl_pil_img_b64_str.decode('utf-8') + \
f'" alt="{pxl_cell_coord_str}" />'
return pd_df