Icon

kn_​example_​python_​csv_​import_​force_​strings

import complex CSV file and force all columns as strings using the bundled python version

import complex CSV file and force all columns as strings using the bundled python version

import knime.scripting.io as knioimport numpy as npimport pandas as pdimport pyarrow.parquet as pqvar_csv_file= knio.flow_variables['v_path_csv_file']var_parquet_file= knio.flow_variables['v_path_parquet_file']df = pd.read_csv(var_csv_file, dtype=str, sep = '¤', engine='python')df.to_parquet(var_parquet_file, compression='gzip')knio.output_tables[0] = knio.Table.from_pandas(df) import complex CSV file and force all columns as strings using the bundled python versionhttps://forum.knime.com/t/force-knime-to-read-csv-columns-as-strings/11327/10?u=mlauber71 import knime.scripting.io as knioimport numpy as npimport pandas as pdimport astimport pyarrow.parquet as pq# Create a sample CSV filecsv_data = '''col1;col2;col3;col4;col5;col6;col71.0;true;2020-01-01;1:00:00;cat1;434;{'a', 'b', 'c'}2.5;false;2020-02-01;2:00:00;cat2;554;{'d', 'e', 'f'}3.0;true;2020-03-01;3:00:00;cat3;677;{'g', 'h', 'i'}''' var_csv_file = knio.flow_variables['v_path_csv_file']var_parquet_file= knio.flow_variables['v_path_parquet_file']with open(var_csv_file, 'w') as f: f.write(csv_data)# Read the CSV file and specify the data types of the columnsdf = pd.read_csv(var_csv_file, dtype={'col1': 'float', 'col2': 'bool', 'col5': 'category', 'col6':'int64', 'col7': 'str'}, sep=';', parse_dates=['col3'])# Convert the col4 column to a timedelta objectdf['col4'] = pd.to_timedelta(df['col4'])# Convert the 'col3' column to a set data typedf['col7'] = df['col7'].apply(ast.literal_eval)# View the data types of the columnsprint(df.dtypes)# View the data in the DataFrameprint(df)df.to_parquet(var_parquet_file, compression='gzip')knio.output_tables[0] = knio.Table.from_pandas(df) test_1.csvseparator is ASCII 164"¤" the “flattened turtle”v_path_csv_fileforce importof all variables as stringtest_1.tablev_path_parquet_filetest_1.parquetv_path_*test_1.tablerow_idas RowIDv_path_csv_filetest_2.csv_path_parquet_filetest_2.parquetv_path_*force importof all variables as string Test Data Generator CSV Writer Java EditVariable (simple) collect meta data Collectionto String Python Script Table Writer Java EditVariable (simple) Parquet Reader String to Path(Variable) Table Reader RowID Java EditVariable (simple) Java EditVariable (simple) Parquet Reader String to Path(Variable) Python Script import knime.scripting.io as knioimport numpy as npimport pandas as pdimport pyarrow.parquet as pqvar_csv_file= knio.flow_variables['v_path_csv_file']var_parquet_file= knio.flow_variables['v_path_parquet_file']df = pd.read_csv(var_csv_file, dtype=str, sep = '¤', engine='python')df.to_parquet(var_parquet_file, compression='gzip')knio.output_tables[0] = knio.Table.from_pandas(df) import complex CSV file and force all columns as strings using the bundled python versionhttps://forum.knime.com/t/force-knime-to-read-csv-columns-as-strings/11327/10?u=mlauber71 import knime.scripting.io as knioimport numpy as npimport pandas as pdimport astimport pyarrow.parquet as pq# Create a sample CSV filecsv_data = '''col1;col2;col3;col4;col5;col6;col71.0;true;2020-01-01;1:00:00;cat1;434;{'a', 'b', 'c'}2.5;false;2020-02-01;2:00:00;cat2;554;{'d', 'e', 'f'}3.0;true;2020-03-01;3:00:00;cat3;677;{'g', 'h', 'i'}''' var_csv_file = knio.flow_variables['v_path_csv_file']var_parquet_file= knio.flow_variables['v_path_parquet_file']with open(var_csv_file, 'w') as f: f.write(csv_data)# Read the CSV file and specify the data types of the columnsdf = pd.read_csv(var_csv_file, dtype={'col1': 'float', 'col2': 'bool', 'col5': 'category', 'col6':'int64', 'col7': 'str'}, sep=';', parse_dates=['col3'])# Convert the col4 column to a timedelta objectdf['col4'] = pd.to_timedelta(df['col4'])# Convert the 'col3' column to a set data typedf['col7'] = df['col7'].apply(ast.literal_eval)# View the data types of the columnsprint(df.dtypes)# View the data in the DataFrameprint(df)df.to_parquet(var_parquet_file, compression='gzip')knio.output_tables[0] = knio.Table.from_pandas(df) test_1.csvseparator is ASCII 164"¤" the “flattened turtle”v_path_csv_fileforce importof all variables as stringtest_1.tablev_path_parquet_filetest_1.parquetv_path_*test_1.tablerow_idas RowIDv_path_csv_filetest_2.csv_path_parquet_filetest_2.parquetv_path_*force importof all variables as stringTest Data Generator CSV Writer Java EditVariable (simple) collect meta data Collectionto String Python Script Table Writer Java EditVariable (simple) Parquet Reader String to Path(Variable) Table Reader RowID Java EditVariable (simple) Java EditVariable (simple) Parquet Reader String to Path(Variable) Python Script

Nodes

Extensions

Links