Icon

kn_​example_​python_​lists_​sets_​distance

Python - work with lists and sets - also: distances between sets and comparison

Python - work with lists and sets - also: distances between sets and comparison

import knime.scripting.io as knioimport numpy as npimport pandas as pdfrom itertools import combinations# Define a list of sets and their corresponding IDsset_data = [ {'ID': 'Set 1', 'Set': {'apple', 'banana', 'orange', 'pear'}}, {'ID': 'Set 2', 'Set': {'banana', 'cherry', 'pear'}}, {'ID': 'Set 3', 'Set': {'cherry', 'durian'}}, {'ID': 'Set 4', 'Set': {'durian', 'elderberry', 'fig'}}, {'ID': 'Set 5', 'Set': {'cherry', 'durian'}}]# Extract the sets and their IDs from the set_data listsets = [d['Set'] for d in set_data]set_ids = [d['ID'] for d in set_data]# Create a Pandas dataframe to store the setsset_df = pd.DataFrame(set_data)# Calculate the Jaccard distance and difference between each pair of setsdata = {'Set 1 ID': [], 'Set 1': [], 'Set 2 ID': [], 'Set 2': [], 'Jaccard Distance': [], 'Intersection': [], 'Set 1 Difference': [], 'Set 2 Difference': []}for i, s1 in enumerate(sets[:-1]): for j, s2 in enumerate(sets[i+1:], i+1): intersection = s1.intersection(s2) union = s1.union(s2) jaccard_distance = 1 - len(intersection) / len(union) s1_difference = s1.difference(s2) s2_difference = s2.difference(s1) data['Set 1 ID'].append(set_ids[i]) data['Set 1'].append(s1) data['Set 2 ID'].append(set_ids[j]) data['Set 2'].append(s2) data['Jaccard Distance'].append(jaccard_distance) data['Intersection'].append(intersection if intersection else None) data['Set 1 Difference'].append(s1_difference if s1_difference else None) data['Set 2 Difference'].append(s2_difference if s2_difference else None)df = pd.DataFrame(data)# Calculate the average Jaccard distance for all setsaverage_distance = df['Jaccard Distance'].mean()# Print the dataframes and the average Jaccard distanceprint("Set Data:")print(set_df)print("\nJaccard Distance Data:")print(df)print(f"\nAverage Jaccard Distance: {average_distance}")knio.output_tables[0] = knio.Table.from_pandas(set_df)knio.output_tables[1] = knio.Table.from_pandas(df) import knime.scripting.io as knioimport numpy as npimport pandas as pdfrom itertools import combinationsinput_df = knio.input_tables[0].to_pandas()# Convert the 'Set' column to setsinput_df['Set'] = input_df['Set'].apply(set)# Convert sets to list format with IDsset_data = [{'ID': row['ID'], 'Set': row['Set']} for _, row in input_df.iterrows()]# Extract the sets and their IDs from the set_data listsets = [d['Set'] for d in set_data]set_ids = [d['ID'] for d in set_data]# Create a Pandas dataframe to store the setsset_df = pd.DataFrame(set_data)# Calculate the Jaccard distance and intersection between each pair of setsdata = {'Set 1 ID': [], 'Set 1': [], 'Set 2 ID': [], 'Set 2': [], 'Jaccard Distance': [], 'Intersection': []}for i, s1 in enumerate(sets[:-1]): for j, s2 in enumerate(sets[i+1:], i+1): intersection = s1.intersection(s2) union = s1.union(s2) jaccard_distance = 1 - len(intersection) / len(union) data['Set 1 ID'].append(set_ids[i]) data['Set 1'].append(s1) data['Set 2 ID'].append(set_ids[j]) data['Set 2'].append(s2) data['Jaccard Distance'].append(jaccard_distance) data['Intersection'].append(intersection if intersection else None)df = pd.DataFrame(data)# Calculate the average Jaccard distance for all setsaverage_distance = df['Jaccard Distance'].mean()# Print the dataframes and the average Jaccard distanceprint("Set Data:")print(set_df)print("\nJaccard Distance Data:")print(df)print(f"\nAverage Jaccard Distance: {average_distance}")knio.output_tables[0] = knio.Table.from_pandas(set_df)knio.output_tables[1] = knio.Table.from_pandas(df) Python - work with lists and sets - also: distances between sets and comparison compare tow sets 1compare tow sets 2set of setsList Collection of Stringsset of sets from List input asCollection of Stringscheck if the setcame thruunharmedresult_1.tableresult_2.tableset_1.table Python Script Python Script Python Script Python Script Table DifferenceFinder Table Writer Table Writer Table Writer import knime.scripting.io as knioimport numpy as npimport pandas as pdfrom itertools import combinations# Define a list of sets and their corresponding IDsset_data = [ {'ID': 'Set 1', 'Set': {'apple', 'banana', 'orange', 'pear'}}, {'ID': 'Set 2', 'Set': {'banana', 'cherry', 'pear'}}, {'ID': 'Set 3', 'Set': {'cherry', 'durian'}}, {'ID': 'Set 4', 'Set': {'durian', 'elderberry', 'fig'}}, {'ID': 'Set 5', 'Set': {'cherry', 'durian'}}]# Extract the sets and their IDs from the set_data listsets = [d['Set'] for d in set_data]set_ids = [d['ID'] for d in set_data]# Create a Pandas dataframe to store the setsset_df = pd.DataFrame(set_data)# Calculate the Jaccard distance and difference between each pair of setsdata = {'Set 1 ID': [], 'Set 1': [], 'Set 2 ID': [], 'Set 2': [], 'Jaccard Distance': [], 'Intersection': [], 'Set 1 Difference': [], 'Set 2 Difference': []}for i, s1 in enumerate(sets[:-1]): for j, s2 in enumerate(sets[i+1:], i+1): intersection = s1.intersection(s2) union = s1.union(s2) jaccard_distance = 1 - len(intersection) / len(union) s1_difference = s1.difference(s2) s2_difference = s2.difference(s1) data['Set 1 ID'].append(set_ids[i]) data['Set 1'].append(s1) data['Set 2 ID'].append(set_ids[j]) data['Set 2'].append(s2) data['Jaccard Distance'].append(jaccard_distance) data['Intersection'].append(intersection if intersection else None) data['Set 1 Difference'].append(s1_difference if s1_difference else None) data['Set 2 Difference'].append(s2_difference if s2_difference else None)df = pd.DataFrame(data)# Calculate the average Jaccard distance for all setsaverage_distance = df['Jaccard Distance'].mean()# Print the dataframes and the average Jaccard distanceprint("Set Data:")print(set_df)print("\nJaccard Distance Data:")print(df)print(f"\nAverage Jaccard Distance: {average_distance}")knio.output_tables[0] = knio.Table.from_pandas(set_df)knio.output_tables[1] = knio.Table.from_pandas(df) import knime.scripting.io as knioimport numpy as npimport pandas as pdfrom itertools import combinationsinput_df = knio.input_tables[0].to_pandas()# Convert the 'Set' column to setsinput_df['Set'] = input_df['Set'].apply(set)# Convert sets to list format with IDsset_data = [{'ID': row['ID'], 'Set': row['Set']} for _, row in input_df.iterrows()]# Extract the sets and their IDs from the set_data listsets = [d['Set'] for d in set_data]set_ids = [d['ID'] for d in set_data]# Create a Pandas dataframe to store the setsset_df = pd.DataFrame(set_data)# Calculate the Jaccard distance and intersection between each pair of setsdata = {'Set 1 ID': [], 'Set 1': [], 'Set 2 ID': [], 'Set 2': [], 'Jaccard Distance': [], 'Intersection': []}for i, s1 in enumerate(sets[:-1]): for j, s2 in enumerate(sets[i+1:], i+1): intersection = s1.intersection(s2) union = s1.union(s2) jaccard_distance = 1 - len(intersection) / len(union) data['Set 1 ID'].append(set_ids[i]) data['Set 1'].append(s1) data['Set 2 ID'].append(set_ids[j]) data['Set 2'].append(s2) data['Jaccard Distance'].append(jaccard_distance) data['Intersection'].append(intersection if intersection else None)df = pd.DataFrame(data)# Calculate the average Jaccard distance for all setsaverage_distance = df['Jaccard Distance'].mean()# Print the dataframes and the average Jaccard distanceprint("Set Data:")print(set_df)print("\nJaccard Distance Data:")print(df)print(f"\nAverage Jaccard Distance: {average_distance}")knio.output_tables[0] = knio.Table.from_pandas(set_df)knio.output_tables[1] = knio.Table.from_pandas(df) Python - work with lists and sets - also: distances between sets and comparison compare tow sets 1compare tow sets 2set of setsList Collection of Stringsset of sets from List input asCollection of Stringscheck if the setcame thruunharmedresult_1.tableresult_2.tableset_1.table Python Script Python Script Python Script Python Script Table DifferenceFinder Table Writer Table Writer Table Writer

Nodes

Extensions

Links