read excel file using python panda lib
import pandas as pd
import os
def read_excel_files(directory):
# Initialize an empty list to store data frames
dfs = []
# Loop through all files in the directory
for filename in os.listdir(directory):
if filename.endswith(".xlsx"): # Ensure the file is an Excel file
filepath = os.path.join(directory, filename)
# Read all sheets from the Excel file into a dictionary of data frames
xls = pd.ExcelFile(filepath)
sheet_dfs = {}
for sheet_name in xls.sheet_names:
sheet_dfs[sheet_name] = xls.parse(sheet_name)
# Append the dictionary to the list of data frames
dfs.append(sheet_dfs)
return dfs
def process_dataframes(dfs):
# Initialize an empty list to store processed data frames
processed_dfs = []
# Loop through each dictionary of data frames
for dfs_dict in dfs:
# Loop through each sheet and data frame in the dictionary
for sheet_name, df in dfs_dict.items():
# Assuming the first row contains column names
column_names = df.iloc[0].tolist()
# Create a new data frame excluding the first row as it contains column names
new_df = pd.DataFrame(df.values[1:], columns=column_names)
# Append the processed data frame to the list
processed_dfs.append((sheet_name, new_df))
return processed_dfs
if __name__ == "__main__":
directory = "path/to/your/excel/files"
# Read Excel files
dfs = read_excel_files(directory)
# Process data frames
processed_dfs = process_dataframes(dfs)
# Print or further process the processed data frames
for sheet_name, df in processed_dfs:
print("Sheet Name:", sheet_name)
print(df)
print()