Icon

kn_​forum_​46899_​python_​type_​data

Python script executing

Python script executing

import knime_io as knioimport pandas as pdimport pyarrow as paimport pyarrow.parquet as pqreplace_list = ['*', 'QSC5 ']#read datatable = knio.input_tables[0].to_pyarrow()df = table.to_pandas(date_as_object=False)# df['Creation date'] = pd.to_datetime(df['Creation date'] , dayfirst=True)df['Creation date'] = pd.to_datetime(df['Creation date'])# df = knio.input_tables[0].to_pandas()# g = df.columns.to_series().groupby(df.dtypes).groups# print(g)title_dict_1 = df[~df['Type'].isna() & df['Column'].isna()][['Creation date', 'Title','Question']]title_dict_2 = df[~df['Type'].isna() & (df['Type'].isin(['S']))][['Creation date','Title','Question']]title_dict = pd.concat([title_dict_1, title_dict_2])for str_x in replace_list: title_dict['Title'] = title_dict['Title'].apply(lambda x:x.replace(str_x,''))df['Question'] = df['Question'].fillna(method = 'ffill')df['Type'] = df['Type'].fillna(method = 'ffill')df['SubNo'] = df['Question']df['Question'] = df['Question'].apply(lambda x:x.rsplit('_',1)[0].split('[')[0])df2 = df.merge(title_dict, how = 'left', on = ['Question','Creation date'] ) df3=df2[~df2['CtgNo'].isna() & (df2['Type'] != 'SL')]df4 = df2[(df2['Type'] == 'SL') & ~df2['Column'].isna()]# this gives an error message so I left it out# df4['CtgNo'] = df4['SubNo'].apply(lambda x:x.rsplit('_',1)[1])df4 = df4.rename(columns={'CtgNo':'Matrix', 'Title_x':'M_title'})df5 = df2[(df2['Type'] == 'SL') & (df2['Column'].isna()) & (~df2['Title_y'].isna())]df5 = df5.drop(['Column', 'Question', 'Type', 'Title_y'], axis=1)df6 = df4.merge(df5, how='left', on = ['SubNo', 'Creation date'])df7 = pd.concat([df6,df3])df7 = df7.rename(columns = {'Title_y':'Q_Title', 'Title_x':'C_Title', 'CtgNo':'Choice'})df7 = df7[['Creation date', 'SubNo', 'Type', 'Question', 'Q_Title','Matrix','M_title', 'Choice', 'C_Title']]df7.reset_index(inplace=True)# g = df7.columns.to_series().groupby(df7.dtypes).groups# print(g)knio.output_tables[0] = knio.write_table(df7) https://forum.knime.com/t/error-upon-executing-python-script-rows-can-only-be-sliced-by-slice-objects-but-got-class-type/46899/3?u=mlauber71 export_file.parquetfile exported from Jupyter Notebook/data/kn_forum_46899_python_type_data.ipynbinput and code.xlsxNode 3 Parquet Reader Excel Reader Python Script(Labs) import knime_io as knioimport pandas as pdimport pyarrow as paimport pyarrow.parquet as pqreplace_list = ['*', 'QSC5 ']#read datatable = knio.input_tables[0].to_pyarrow()df = table.to_pandas(date_as_object=False)# df['Creation date'] = pd.to_datetime(df['Creation date'] , dayfirst=True)df['Creation date'] = pd.to_datetime(df['Creation date'])# df = knio.input_tables[0].to_pandas()# g = df.columns.to_series().groupby(df.dtypes).groups# print(g)title_dict_1 = df[~df['Type'].isna() & df['Column'].isna()][['Creation date', 'Title','Question']]title_dict_2 = df[~df['Type'].isna() & (df['Type'].isin(['S']))][['Creation date','Title','Question']]title_dict = pd.concat([title_dict_1, title_dict_2])for str_x in replace_list: title_dict['Title'] = title_dict['Title'].apply(lambda x:x.replace(str_x,''))df['Question'] = df['Question'].fillna(method = 'ffill')df['Type'] = df['Type'].fillna(method = 'ffill')df['SubNo'] = df['Question']df['Question'] = df['Question'].apply(lambda x:x.rsplit('_',1)[0].split('[')[0])df2 = df.merge(title_dict, how = 'left', on = ['Question','Creation date'] ) df3=df2[~df2['CtgNo'].isna() & (df2['Type'] != 'SL')]df4 = df2[(df2['Type'] == 'SL') & ~df2['Column'].isna()]# this gives an error message so I left it out# df4['CtgNo'] = df4['SubNo'].apply(lambda x:x.rsplit('_',1)[1])df4 = df4.rename(columns={'CtgNo':'Matrix', 'Title_x':'M_title'})df5 = df2[(df2['Type'] == 'SL') & (df2['Column'].isna()) & (~df2['Title_y'].isna())]df5 = df5.drop(['Column', 'Question', 'Type', 'Title_y'], axis=1)df6 = df4.merge(df5, how='left', on = ['SubNo', 'Creation date'])df7 = pd.concat([df6,df3])df7 = df7.rename(columns = {'Title_y':'Q_Title', 'Title_x':'C_Title', 'CtgNo':'Choice'})df7 = df7[['Creation date', 'SubNo', 'Type', 'Question', 'Q_Title','Matrix','M_title', 'Choice', 'C_Title']]df7.reset_index(inplace=True)# g = df7.columns.to_series().groupby(df7.dtypes).groups# print(g)knio.output_tables[0] = knio.write_table(df7) https://forum.knime.com/t/error-upon-executing-python-script-rows-can-only-be-sliced-by-slice-objects-but-got-class-type/46899/3?u=mlauber71 export_file.parquetfile exported from Jupyter Notebook/data/kn_forum_46899_python_type_data.ipynbinput and code.xlsxNode 3 Parquet Reader Excel Reader Python Script(Labs)

Nodes

Extensions

Links