Icon

kn_​example_​python_​excel_​format_​extract

extract the formats of the first data row of an Excel file with KNIME and OpenPyxl

extract the formats of the first data row of an Excel file with KNIME and OpenPyxl
https://forum.knime.com/t/could-self-defined-format-in-excel-table-be-added-into-knime/67228/4?u=mlauber71

extract the formats of the first data row of an Excel file with KNIME and OpenPyxlhttps://forum.knime.com/t/could-self-defined-format-in-excel-table-be-added-into-knime/67228/4?u=mlauber71 import knime.scripting.io as knio# https://openpyxl.readthedocs.io/en/stable/tutorial.html#loading-from-a-file# conda install -c anaconda openpyxlfrom openpyxl import load_workbookfrom openpyxl import Workbookfrom openpyxl.styles import numbersimport openpyxlvar_data_path = knio.flow_variables['context.workflow.data-path']# print("var_data_path: ", var_data_path )var_path_excel_file = var_data_path + "my_data.xlsx"# print("var_path_excel_file: ", var_path_excel_file )knio.flow_variables['var_path_excel_file'] = var_path_excel_file# var_sheet_name = knio.flow_variables['var_sheet_name']# Create a workbookworkbook = Workbook()# Create a new sheet and set its namesheet = workbook.active# Headersheaders = ["Text", "Number", "Date", "Currency", "Percentage", "Scientific", "Fraction", "Accounting", "Quantity"]sheet.append(headers)# Sample datadata = ["Sample Text", 1234567890.1234567890,"2023-05-19", 123456.78, 0.1234567890, 1234567890.1234567890, 0.1234567890, 123456.78, 42]# Append datasheet.append(data)# Apply number formatssheet['B2'].number_format = '#,##0.00' # Number with 2 decimal placessheet['C2'].number_format = 'yyyy-mm-dd' # Datesheet['D2'].number_format = '"$"#,##0.00' # Currencysheet['E2'].number_format = '0.00%' # Percentagesheet['F2'].number_format = '0.00E+00' # Scientific notationsheet['G2'].number_format = '# ?/?' # Fractionsheet['H2'].number_format = r'_("$"* #,##0.00_);_("$"* \(#,##0.00\);_("$"* "-"??_);_(@_)' # Accountingsheet['I2'].number_format = '#.##0,0" EA";-#.##0,0" EA"' # Quantity# Save the workbookworkbook.save(filename=var_path_excel_file)# close the workbookworkbook.close() import knime.scripting.io as knio# https://openpyxl.readthedocs.io/en/stable/tutorial.html#loading-from-a-file# conda install -c anaconda openpyxlimport openpyxlimport pandas as pdvar_data_path = knio.flow_variables['context.workflow.data-path']# print("var_data_path: ", var_data_path )var_path_excel_file = var_data_path + "my_data.xlsx"# print("var_path_excel_file: ", var_path_excel_file )knio.flow_variables['var_path_excel_file'] = var_path_excel_filevar_sheet_name = knio.flow_variables['var_sheet_name']# Load the workbook and select the sheetworkbook = openpyxl.load_workbook(var_path_excel_file)# print list of current sheet namesv_list_sheetnames = workbook.sheetnames# print("v_list_sheetnames: ", v_list_sheetnames)# activate the worksheetsheet = workbook[var_sheet_name]# activate the ws 'data'for s in range(len(workbook.sheetnames)): if workbook.sheetnames[s] == sheet: breakworkbook.active = s# Get headersheaders = [cell.value for cell in sheet[1]]# Get formats from the second rowformats = [cell.number_format for cell in sheet[2]]# Create DataFramedf = pd.DataFrame(list(zip(headers, formats)), columns=['Header', 'Format'])# close the workbookworkbook.close()knio.output_tables[0] = knio.Table.from_pandas(df) import knime.scripting.io as knio# https://openpyxl.readthedocs.io/en/stable/tutorial.html#loading-from-a-file# conda install -c anaconda openpyxlimport openpyxlfrom openpyxl.utils import get_column_letterimport pandas as pdimport datetimevar_data_path = knio.flow_variables['context.workflow.data-path']# print("var_data_path: ", var_data_path )var_path_excel_file = var_data_path + "my_data.xlsx"# print("var_path_excel_file: ", var_path_excel_file )knio.flow_variables['var_path_excel_file'] = var_path_excel_filevar_sheet_name = knio.flow_variables['var_sheet_name']# Load the workbook and select the sheetworkbook = openpyxl.load_workbook(var_path_excel_file)# print list of current sheet namesv_list_sheetnames = workbook.sheetnames# print("v_list_sheetnames: ", v_list_sheetnames)# activate the worksheetsheet = workbook[var_sheet_name]# activate the ws 'data'for s in range(len(workbook.sheetnames)): if workbook.sheetnames[s] == sheet: breakworkbook.active = s# -------------------------------------------def format_number(cell):"""Format a number according to an Excel cell's number format""" if cell.is_date: return cell.value.strftime("%Y-%m-%d") elif cell.number_format == 'General': return str(cell.value) elif cell.number_format == '#,##0.00': return "{:,.2f}".format(cell.value) elif cell.number_format == '#,##0': return "{:,}".format(cell.value) elif cell.number_format == '0.00': return "{:.2f}".format(cell.value) elif cell.number_format == '0.00%': return "{:.2%}".format(cell.value) elif cell.number_format == '0.00E+00': return "{:.2e}".format(cell.value) elif cell.number_format == '"$"#,##0.00': return "${:,.2f}".format(cell.value) elif cell.number_format == '"$"#,##0': return "${:,}".format(cell.value) elif cell.number_format == '# ?/?': return str(round(cell.value)) elif cell.number_format == '# ??/??': return str(round(cell.value)) elif cell.number_format == r'_(* #,##0_);_(* \(#,##0\);_(* "-"_);_(@_)': return "${:,}".format(cell.value) elif cell.number_format == r'_("$"* #,##0.00_);_("$"* \(#,##0.00\);_("$"* "-"??_);_(@_)': return "${:,.2f}".format(cell.value) elif cell.number_format == '#.##00" EA";\\-#.##00" EA"': return "{:.2f} EA".format(cell.value) else: return str(cell.value)# Loop through the cells in the sheet and format themdata = []for row in sheet.iter_rows(min_row=2, values_only=False): # Start from the 2nd row formatted_row = [format_number(cell) if cell.is_date or cell.value is not None and cell.data_type == 'n' else str(cell.value) for cell in row] data.append(formatted_row)# Create a pandas DataFramedf = pd.DataFrame(data, columns=[cell.value for cell in sheet[1]])# close the workbookworkbook.close()knio.output_tables[0] = knio.Table.from_pandas(df) locate and create/data/ folderwith absolute pathsvar_sheet_name" Sheet" create sample fileextract the formats of the first data row of an Excel fileresult_headers.tableforce pandas to importall data as stringstry to represent the Excel Formats asPandas Formats and import the stringsresult_pandas_formats.table Collect LocalMetadata StringConfiguration Merge Variables Python Script Python Script Table Writer Python Script Python Script Table Writer Transpose extract the formats of the first data row of an Excel file with KNIME and OpenPyxlhttps://forum.knime.com/t/could-self-defined-format-in-excel-table-be-added-into-knime/67228/4?u=mlauber71 import knime.scripting.io as knio# https://openpyxl.readthedocs.io/en/stable/tutorial.html#loading-from-a-file# conda install -c anaconda openpyxlfrom openpyxl import load_workbookfrom openpyxl import Workbookfrom openpyxl.styles import numbersimport openpyxlvar_data_path = knio.flow_variables['context.workflow.data-path']# print("var_data_path: ", var_data_path )var_path_excel_file = var_data_path + "my_data.xlsx"# print("var_path_excel_file: ", var_path_excel_file )knio.flow_variables['var_path_excel_file'] = var_path_excel_file# var_sheet_name = knio.flow_variables['var_sheet_name']# Create a workbookworkbook = Workbook()# Create a new sheet and set its namesheet = workbook.active# Headersheaders = ["Text", "Number", "Date", "Currency", "Percentage", "Scientific", "Fraction", "Accounting", "Quantity"]sheet.append(headers)# Sample datadata = ["Sample Text", 1234567890.1234567890,"2023-05-19", 123456.78, 0.1234567890, 1234567890.1234567890, 0.1234567890, 123456.78, 42]# Append datasheet.append(data)# Apply number formatssheet['B2'].number_format = '#,##0.00' # Number with 2 decimal placessheet['C2'].number_format = 'yyyy-mm-dd' # Datesheet['D2'].number_format = '"$"#,##0.00' # Currencysheet['E2'].number_format = '0.00%' # Percentagesheet['F2'].number_format = '0.00E+00' # Scientific notationsheet['G2'].number_format = '# ?/?' # Fractionsheet['H2'].number_format = r'_("$"* #,##0.00_);_("$"* \(#,##0.00\);_("$"* "-"??_);_(@_)' # Accountingsheet['I2'].number_format = '#.##0,0" EA";-#.##0,0" EA"' # Quantity# Save the workbookworkbook.save(filename=var_path_excel_file)# close the workbookworkbook.close() import knime.scripting.io as knio# https://openpyxl.readthedocs.io/en/stable/tutorial.html#loading-from-a-file# conda install -c anaconda openpyxlimport openpyxlimport pandas as pdvar_data_path = knio.flow_variables['context.workflow.data-path']# print("var_data_path: ", var_data_path )var_path_excel_file = var_data_path + "my_data.xlsx"# print("var_path_excel_file: ", var_path_excel_file )knio.flow_variables['var_path_excel_file'] = var_path_excel_filevar_sheet_name = knio.flow_variables['var_sheet_name']# Load the workbook and select the sheetworkbook = openpyxl.load_workbook(var_path_excel_file)# print list of current sheet namesv_list_sheetnames = workbook.sheetnames# print("v_list_sheetnames: ", v_list_sheetnames)# activate the worksheetsheet = workbook[var_sheet_name]# activate the ws 'data'for s in range(len(workbook.sheetnames)): if workbook.sheetnames[s] == sheet: breakworkbook.active = s# Get headersheaders = [cell.value for cell in sheet[1]]# Get formats from the second rowformats = [cell.number_format for cell in sheet[2]]# Create DataFramedf = pd.DataFrame(list(zip(headers, formats)), columns=['Header', 'Format'])# close the workbookworkbook.close()knio.output_tables[0] = knio.Table.from_pandas(df) import knime.scripting.io as knio# https://openpyxl.readthedocs.io/en/stable/tutorial.html#loading-from-a-file# conda install -c anaconda openpyxlimport openpyxlfrom openpyxl.utils import get_column_letterimport pandas as pdimport datetimevar_data_path = knio.flow_variables['context.workflow.data-path']# print("var_data_path: ", var_data_path )var_path_excel_file = var_data_path + "my_data.xlsx"# print("var_path_excel_file: ", var_path_excel_file )knio.flow_variables['var_path_excel_file'] = var_path_excel_filevar_sheet_name = knio.flow_variables['var_sheet_name']# Load the workbook and select the sheetworkbook = openpyxl.load_workbook(var_path_excel_file)# print list of current sheet namesv_list_sheetnames = workbook.sheetnames# print("v_list_sheetnames: ", v_list_sheetnames)# activate the worksheetsheet = workbook[var_sheet_name]# activate the ws 'data'for s in range(len(workbook.sheetnames)): if workbook.sheetnames[s] == sheet: breakworkbook.active = s# -------------------------------------------def format_number(cell):"""Format a number according to an Excel cell's number format""" if cell.is_date: return cell.value.strftime("%Y-%m-%d") elif cell.number_format == 'General': return str(cell.value) elif cell.number_format == '#,##0.00': return "{:,.2f}".format(cell.value) elif cell.number_format == '#,##0': return "{:,}".format(cell.value) elif cell.number_format == '0.00': return "{:.2f}".format(cell.value) elif cell.number_format == '0.00%': return "{:.2%}".format(cell.value) elif cell.number_format == '0.00E+00': return "{:.2e}".format(cell.value) elif cell.number_format == '"$"#,##0.00': return "${:,.2f}".format(cell.value) elif cell.number_format == '"$"#,##0': return "${:,}".format(cell.value) elif cell.number_format == '# ?/?': return str(round(cell.value)) elif cell.number_format == '# ??/??': return str(round(cell.value)) elif cell.number_format == r'_(* #,##0_);_(* \(#,##0\);_(* "-"_);_(@_)': return "${:,}".format(cell.value) elif cell.number_format == r'_("$"* #,##0.00_);_("$"* \(#,##0.00\);_("$"* "-"??_);_(@_)': return "${:,.2f}".format(cell.value) elif cell.number_format == '#.##00" EA";\\-#.##00" EA"': return "{:.2f} EA".format(cell.value) else: return str(cell.value)# Loop through the cells in the sheet and format themdata = []for row in sheet.iter_rows(min_row=2, values_only=False): # Start from the 2nd row formatted_row = [format_number(cell) if cell.is_date or cell.value is not None and cell.data_type == 'n' else str(cell.value) for cell in row] data.append(formatted_row)# Create a pandas DataFramedf = pd.DataFrame(data, columns=[cell.value for cell in sheet[1]])# close the workbookworkbook.close()knio.output_tables[0] = knio.Table.from_pandas(df) locate and create/data/ folderwith absolute pathsvar_sheet_name" Sheet" create sample fileextract the formats of the first data row of an Excel fileresult_headers.tableforce pandas to importall data as stringstry to represent the Excel Formats asPandas Formats and import the stringsresult_pandas_formats.tableCollect LocalMetadata StringConfiguration Merge Variables Python Script Python Script Table Writer Python Script Python Script Table Writer Transpose

Nodes

Extensions

Links