Import yaml to excel



我有以下.YAML文件:

Name: Tom
Surname: Smith
Status: Married
Childs:
- ChildName: Willy
Age: 30
Grandchild:
- GrandchildName: John
Age: 4
- GrandchildName: Maria
Age: 3
- ChildName: Arthur
Age: 40
Grandchild: N/A

我所做的:

import openpyxl
import yaml
with open('family.yaml') as file:
family_list = yaml.load(file, Loader=yaml.FullLoader)
def create_workbook():
wb = openpyxl.Workbook('family.xlsx')
wb.save('family.xlsx')
def update_workbook():
wb = openpyxl.load_workbook('family.xlsx')
sh1 = wb.active
for item in family_list.items():
sh1.append(item)
wb.save('family.xlsx')
create_workbook()
update_workbook()

这个代码允许我添加数据(没有"暂停"部分,所以只有姓名、姓氏、状态(到excel,但;名称"姓氏";等等被添加到一列A1中(而不是像我希望的那样在行中(

最终excel工作簿我想获得

我不知道如何附加和格式化(每个停顿符号都应该是新行(提到的数据。我将感谢任何提示!

我决定将yaml加载拆分为三个字典



,以便可以调整它们以适应excel布局。然后使用Pandas将这些导出到Excel的适当行/列位置

代码将处理"YAML_files"中的YAML文件。第一次跑步时设置"save_file",将Header写入第1行,然后将每个YAML文件数据写入放置在电子表格A-G列上一个条目下。任何后续在同一个"save_file"上运行会将新数据置于上次写入的数据之下
每个文件条目也通过交替的背景颜色突出显示
您可以更改所使用的背景色,也可以根据需要禁用。

import glob
import os
import pandas as pd
import yaml
from openpyxl import load_workbook
from yaml.loader import SafeLoader
from openpyxl.utils import get_column_letter
from openpyxl.styles import PatternFill

def write_next_segments(dataf, header, start_row, start_col):
dataframe = ''
if start_col == 0:
dataframe = pd.DataFrame([parent_dictionary])
dataframe.to_excel(writer, sheet_name=save_sheet,
index=False,
header=header,
startrow=start_row,
startcol=start_col)
else:
for ele in dataf:
dataframe = pd.DataFrame([dataf[ele]])
dataframe.to_excel(writer, sheet_name=save_sheet,
index=False,
header=header,
startrow=start_row,
startcol=start_col)
if header:
header = False
start_row += 2
else:
start_row += 1
adjust_colmns(dataframe, start_col)

def adjust_colmns(dataframe, offset):
for column in dataframe:
if column != '':
column_width = max(dataframe[column].astype(str).map(len).max(), len(column)) + 2
col_letter = get_column_letter(dataframe.columns.get_loc(column) + offset + 1)
writer.sheets[save_sheet].column_dimensions[col_letter].width = column_width

def build_dictionary(yf):
# Open the file and load the file
print("Formatting YAML file: " + yf)
yaml_sub_name = ''  # Name of 'child' sub element
with open(yf) as file:
yaml_data = yaml.load(file, Loader=SafeLoader)
for k, v in yaml_data.items():
if type(v) == list:
yaml_sub_name = k  # Name of 'child' sub element
# Make copy of the original dictionary for modification
print("------ Copy the Parent dictionary                 -------------")
parent_dict = yaml_data.copy()
print("------ Create the Child & GrandChild dictionaries -------------")
child_dict = {}
grandchild_dict = {}
sub_list = ''
for x in parent_dict[yaml_sub_name][0]:
if len(child_dict) == 0:
child_dict['Childs0'] = {x: ''}
else:
if type(yaml_data[yaml_sub_name][0][x]) != list:
child_dict['Childs0'].update({x: ''})
else:
sub_list = x
for x in parent_dict[yaml_sub_name][0][sub_list][0]:
if len(grandchild_dict) == 0:
grandchild_dict['Grandchilds0'] = {x: ''}
else:
grandchild_dict['Grandchilds0'].update({x: ''})
# Remove the Child elements from the Parent dictionary
child_list = parent_dict.pop(yaml_sub_name)
# Create the rest of the Child and Grandchild dictionaries
row = 1
for enum, ele in enumerate(child_list):
if row != 1: row += 1
child_dict[yaml_sub_name + str(row)] = ele
pop_num = row
if type(child_list[enum][sub_list]) == list:
for i in range(len(child_list[enum][sub_list])):
if i == 0:
grandchild_dict['Grandchilds' + str(row)] = {'': '', '': ''}
grandchild_dict['Grandchilds' + str(row + 1)] = ele[sub_list][i]
row += 1
child_dict[yaml_sub_name + str(row)] = {'': '', '': ''}
child_dict[yaml_sub_name + str(pop_num)].pop(sub_list)
return parent_dict, child_dict, grandchild_dict

def set_background_color(bg_color, bg_start_row):
coords = writer.sheets[save_sheet].dimensions
start_coord, end_coord = coords.split(':')
bg_color_dims = 'A' + str(bg_start_row) + ':' + end_coord[:1] + str(row_offset)
sheet = writer.sheets[save_sheet]
for cells in sheet[bg_color_dims]:
for cell in cells:
cell.fill = PatternFill("solid", start_color=bg_color)
bg_start_row = row_offset + 1
return bg_start_row

if __name__ == "__main__":
# Change these variables as necessary
yaml_dir = './yaml_files'   # Set name of the input yaml files directory
save_file = 'family.xlsx'   # Set name of Excel workbook for output
save_sheet = 'Sheet1'       # Set name of Excel sheet

# These variables should not be changed 
row_offset = 0              # Row number for next Excel write
header = True               # Write Header to Excel
bg_color_start_row = '2'    # First row to start fill background color
# Set start bg color
bg_color = "DDD9C4"         # First segment background color
# If save file already exists then load it and the save sheet
# Get the last row and last background colour used. Header is set to False
if os.path.isfile(save_file):
book = load_workbook(save_file)
writer = pd.ExcelWriter(save_file, engine='openpyxl')
writer.book = book
writer.sheets = dict((ws.title, ws) for ws in book.worksheets)
row_offset = writer.sheets[save_sheet].max_row + 1
bg_color_start_row = row_offset
bg_color = writer.sheets[save_sheet][writer.sheets[save_sheet].dimensions[3:]].fill.fgColor.value[2:]
header = False
else:
# If no save file then create a new Excel file
writer = pd.ExcelWriter(save_file, engine='openpyxl')
# Get a list of the YAML files in yaml directory
yaml_files = glob.glob(yaml_dir + "/[!~]*.yaml")
# Loop through the YAML files
for enum, y_file in enumerate(yaml_files):
if enum > 0:
header = False
print("------ Export dictionaries to Excel using Pandas --------------")
parent_dictionary, child_dictionary, grandchild_dictionary = build_dictionary(y_file)
parent_col_offset = len(parent_dictionary)
child_col_offset = parent_col_offset + len(child_dictionary['Childs0'])
write_next_segments(parent_dictionary, header, row_offset, 0)
write_next_segments(child_dictionary, header, row_offset, parent_col_offset)
write_next_segments(grandchild_dictionary, header, row_offset, child_col_offset)
row_offset = writer.sheets[save_sheet].max_row + 1
# Insert background fill using alternate fill colours
print("------ Set Background Colors ---------------------------------")
if bg_color == "DAEEF3":
bg_color = "DDD9C4"
else:
bg_color = "DAEEF3"
bg_color_start_row = set_background_color(bg_color, bg_color_start_row)
print("n")
print("------ Save Excel file to '" + save_file + "' ---------------------")
writer.save()

最新更新