Icon

kn_​example_​python_​loop_​graphic_​restart

Python - loop thru data rows and create PNG files within a loop - store which items have already been processed

Python - loop thru data rows and create PNG files within a loop - store which items have already been processed

Python - loop thru data rows and create PNG files within a loop - store which items have already been processedhttps://forum.knime.com/t/python-scripts-in-loops/62798/2?u=mlauber71 import knime.scripting.io as knioimport numpy as npimport pandas as pdimport matplotlib.pyplot as pltvar_data_path = knio.flow_variables['context.workflow.data-path']# Load your data into a Pandas DataFrame# df = pd.read_csv(var_data_path + "your_data.csv")df = knio.input_tables[0].to_pandas()# Load the parquet file containing previously processed rowstry: processed_df = pd.read_parquet(var_data_path + "output.parquet")except FileNotFoundError: processed_df = pd.DataFrame(columns=["row_id", "name_column", "timestamp"])# Loop through each row in the DataFramefor index, row in df.iterrows(): # Check if this row has already been processed if any(processed_df["row_id"] == index): continue # Create a plot based on the data in this row x = row["x_values"] y = row["y_values"] plt.plot(x, y) # Set the file name based on a column in the DataFrame file_name = f"{row['name_column']}.png" # Save the plot to disk with the file name plt.savefig(var_data_path + file_name) # Save the row ID to the parquet file new_row = pd.DataFrame({"row_id": [index], "name_column": [row["name_column"]],"timestamp": [pd.Timestamp.now()]}) processed_df = pd.concat([processed_df, new_row], ignore_index=True) # Save the processed rows to the parquet file processed_df.to_parquet(var_data_path + "output.parquet", index=False) # Close the plot plt.close()knio.output_tables[0] = knio.Table.from_pandas(df) import knime.scripting.io as knioimport numpy as npimport pandas as pdvar_data_path = knio.flow_variables['context.workflow.data-path']# Create a 2D NumPy array with the example data and row IDsdata = np.array([[1, "plot1", "[1, 2, 3]", "[4, 5, 6]"], [2, "plot2", "[2, 4, 6]", "[8, 10, 12]"], [3, "plot3", "[3, 6, 9]", "[12, 15, 18]"]])# Create a new DataFrame from the 2D NumPy arraydf = pd.DataFrame(data, columns=["row_id", "name_column", "x_values", "y_values"])df["x_values"] = df["x_values"].apply(lambda x: eval(x)) # convert string to listdf["y_values"] = df["y_values"].apply(lambda x: eval(x)) # convert string to list# Convert the data types of the DataFrame columnsdf["row_id"] = pd.to_numeric(df["row_id"])# df[["x_values", "y_values"]] = df[["x_values", "y_values"]].apply(pd.to_numeric)# Save the DataFrame to a CSV filedf.to_csv(var_data_path + "your_data.csv", index=False)knio.output_tables[0] = knio.Table.from_pandas(df) locate and create/data/ folderwith absolute pathscheck output.parquetwhich lines have already been processedcreate the basicdata file for the loopyou can obviously use your owndataloop withinPythondelete theoutput.parquetfileto start the process again Collect LocalMetadata Parquet Reader Python Script Python Script DeleteFiles/Folders Python - loop thru data rows and create PNG files within a loop - store which items have already been processedhttps://forum.knime.com/t/python-scripts-in-loops/62798/2?u=mlauber71 import knime.scripting.io as knioimport numpy as npimport pandas as pdimport matplotlib.pyplot as pltvar_data_path = knio.flow_variables['context.workflow.data-path']# Load your data into a Pandas DataFrame# df = pd.read_csv(var_data_path + "your_data.csv")df = knio.input_tables[0].to_pandas()# Load the parquet file containing previously processed rowstry: processed_df = pd.read_parquet(var_data_path + "output.parquet")except FileNotFoundError: processed_df = pd.DataFrame(columns=["row_id", "name_column", "timestamp"])# Loop through each row in the DataFramefor index, row in df.iterrows(): # Check if this row has already been processed if any(processed_df["row_id"] == index): continue # Create a plot based on the data in this row x = row["x_values"] y = row["y_values"] plt.plot(x, y) # Set the file name based on a column in the DataFrame file_name = f"{row['name_column']}.png" # Save the plot to disk with the file name plt.savefig(var_data_path + file_name) # Save the row ID to the parquet file new_row = pd.DataFrame({"row_id": [index], "name_column": [row["name_column"]],"timestamp": [pd.Timestamp.now()]}) processed_df = pd.concat([processed_df, new_row], ignore_index=True) # Save the processed rows to the parquet file processed_df.to_parquet(var_data_path + "output.parquet", index=False) # Close the plot plt.close()knio.output_tables[0] = knio.Table.from_pandas(df) import knime.scripting.io as knioimport numpy as npimport pandas as pdvar_data_path = knio.flow_variables['context.workflow.data-path']# Create a 2D NumPy array with the example data and row IDsdata = np.array([[1, "plot1", "[1, 2, 3]", "[4, 5, 6]"], [2, "plot2", "[2, 4, 6]", "[8, 10, 12]"], [3, "plot3", "[3, 6, 9]", "[12, 15, 18]"]])# Create a new DataFrame from the 2D NumPy arraydf = pd.DataFrame(data, columns=["row_id", "name_column", "x_values", "y_values"])df["x_values"] = df["x_values"].apply(lambda x: eval(x)) # convert string to listdf["y_values"] = df["y_values"].apply(lambda x: eval(x)) # convert string to list# Convert the data types of the DataFrame columnsdf["row_id"] = pd.to_numeric(df["row_id"])# df[["x_values", "y_values"]] = df[["x_values", "y_values"]].apply(pd.to_numeric)# Save the DataFrame to a CSV filedf.to_csv(var_data_path + "your_data.csv", index=False)knio.output_tables[0] = knio.Table.from_pandas(df) locate and create/data/ folderwith absolute pathscheck output.parquetwhich lines have already been processedcreate the basicdata file for the loopyou can obviously use your owndataloop withinPythondelete theoutput.parquetfileto start the process againCollect LocalMetadata Parquet Reader Python Script Python Script DeleteFiles/Folders

Nodes

Extensions

Links