Icon

kn_​example_​ml_​regression_​h2o_​glm_​parameter

H2O.ai - extract the summary and parameters from a MOJO model file

H2O.ai - extract the summary and parameters from a MOJO model file

H2O.ai - extract the summary and parameters from a MOJO model file import knime.scripting.io as knioimport numpy as npimport pandas as pdimport h2ofrom h2o.estimators import H2OGeneralizedLinearEstimatorimport seaborn as snsimport osdef process_mojo_file(mojo_file_path, output_dir, file_prefix): # Initialize H2O: h2o.init() # Load the MOJO file: model = h2o.import_mojo(mojo_file_path) # Extract model parameters and save as .parquet file: params = model.actual_params.copy() # Copy to avoid modifying the original for key, value in params.items(): params[key] = str(value) # Convert all values to string params_df = pd.DataFrame.from_dict(params, orient="index", columns=["value"]) params_df.to_parquet(os.path.join(output_dir, f"{file_prefix}_params.parquet")) knio.flow_variables['vpath_params'] = f"{file_prefix}_params.parquet" # Extract model coefficients and save as .parquet file, if available: try: coefficients_df = model.coef() coefficients_df = pd.DataFrame.from_dict(coefficients_df, orient="index", columns=["value"]) coefficients_df.to_parquet(os.path.join(output_dir, f"{file_prefix}_coefficients.parquet")) knio.flow_variables['vpath_coefficients'] = f"{file_prefix}_coefficients.parquet" except KeyError: print("Model has no coefficients_table.") # Extract model metrics and save as .parquet file: # For example, get training metrics training_metrics = model.training_model_metrics() metrics = training_metrics.copy() # Copy to avoid modifying the original for key, value in metrics.items(): metrics[key] = str(value) # Convert all values to string metrics_df = pd.DataFrame.from_dict(metrics, orient="index", columns=["value"]) metrics_df.to_parquet(os.path.join(output_dir, f"{file_prefix}_metrics.parquet")) knio.flow_variables['vpath_metrics'] = f"{file_prefix}_metrics.parquet" summary = model.summary() summary_df = pd.DataFrame(summary.cell_values) summary_df.columns = summary.col_header summary_df.to_parquet(os.path.join(output_dir, f"{file_prefix}_summary.parquet")) knio.flow_variables['vpath_summary'] = f"{file_prefix}_summary.parquet" try: varimp = model.varimp() varimp_df = pd.DataFrame(varimp, columns=["variable", "relative_importance", "scaled_importance", "percentage"]) varimp_df.to_parquet(os.path.join(output_dir, f"{file_prefix}_varimp.parquet")) knio.flow_variables['vpath_varimp'] = f"{file_prefix}_varimp.parquet" except AttributeError: print("Model has no variable importances.") try: scoring_history = model.scoring_history() scoring_history_df = scoring_history.as_data_frame() scoring_history_df.to_parquet(os.path.join(output_dir, f"{file_prefix}_scoring_history.parquet")) knio.flow_variables['vpath_scoring_history'] = f"{file_prefix}_scoring_history.parquet" except AttributeError: print("Model has no scoring history.") # Shut down the H2O cluster: h2o.cluster().shutdown()v_path_mojo_file = knio.flow_variables['File path']v_output_path = knio.flow_variables['context.workflow.data-path']v_prefix = knio.flow_variables['File name']# Call the functionprocess_mojo_file(v_path_mojo_file, v_output_path, v_prefix) locate and create/data/ folderwith absolute pathsextractFile Namesdataset_regression_80.parquethttps://www.kaggle.com/c/house-prices-advanced-regression-techniques/overviewdataset_regression_20.parquethttps://www.kaggle.com/c/house-prices-advanced-regression-techniques/overviewAPPLYmy_glm_model.zipNode 721extract summarypy3_knime_h2o.ymlin the /data/ subfolderhttps://medium.com/p/2ac217792539my_glm_model_metrics.parquetmy_glm_model_params.parquetmy_glm_model_summary.parquetsummarymy_glm_model_varimp.parquetp_varimpp_summaryp_metricsp_params Collect LocalMetadata URL to File Path H2O Local Context Parquet Reader Parquet Reader Missing Value Missing Value(Apply) H2O Generalized LinearModel Learner (Regression) Table to H2O H2O Model to MOJO H2O MOJO Writer Path to URI Variable toTable Row Python Script Table Rowto Variable Conda EnvironmentPropagation Parquet Reader Parquet Reader Parquet Reader Transpose Parquet Reader Create File/FolderVariables Create File/FolderVariables Create File/FolderVariables Create File/FolderVariables H2O.ai - extract the summary and parameters from a MOJO model file import knime.scripting.io as knioimport numpy as npimport pandas as pdimport h2ofrom h2o.estimators import H2OGeneralizedLinearEstimatorimport seaborn as snsimport osdef process_mojo_file(mojo_file_path, output_dir, file_prefix): # Initialize H2O: h2o.init() # Load the MOJO file: model = h2o.import_mojo(mojo_file_path) # Extract model parameters and save as .parquet file: params = model.actual_params.copy() # Copy to avoid modifying the original for key, value in params.items(): params[key] = str(value) # Convert all values to string params_df = pd.DataFrame.from_dict(params, orient="index", columns=["value"]) params_df.to_parquet(os.path.join(output_dir, f"{file_prefix}_params.parquet")) knio.flow_variables['vpath_params'] = f"{file_prefix}_params.parquet" # Extract model coefficients and save as .parquet file, if available: try: coefficients_df = model.coef() coefficients_df = pd.DataFrame.from_dict(coefficients_df, orient="index", columns=["value"]) coefficients_df.to_parquet(os.path.join(output_dir, f"{file_prefix}_coefficients.parquet")) knio.flow_variables['vpath_coefficients'] = f"{file_prefix}_coefficients.parquet" except KeyError: print("Model has no coefficients_table.") # Extract model metrics and save as .parquet file: # For example, get training metrics training_metrics = model.training_model_metrics() metrics = training_metrics.copy() # Copy to avoid modifying the original for key, value in metrics.items(): metrics[key] = str(value) # Convert all values to string metrics_df = pd.DataFrame.from_dict(metrics, orient="index", columns=["value"]) metrics_df.to_parquet(os.path.join(output_dir, f"{file_prefix}_metrics.parquet")) knio.flow_variables['vpath_metrics'] = f"{file_prefix}_metrics.parquet" summary = model.summary() summary_df = pd.DataFrame(summary.cell_values) summary_df.columns = summary.col_header summary_df.to_parquet(os.path.join(output_dir, f"{file_prefix}_summary.parquet")) knio.flow_variables['vpath_summary'] = f"{file_prefix}_summary.parquet" try: varimp = model.varimp() varimp_df = pd.DataFrame(varimp, columns=["variable", "relative_importance", "scaled_importance", "percentage"]) varimp_df.to_parquet(os.path.join(output_dir, f"{file_prefix}_varimp.parquet")) knio.flow_variables['vpath_varimp'] = f"{file_prefix}_varimp.parquet" except AttributeError: print("Model has no variable importances.") try: scoring_history = model.scoring_history() scoring_history_df = scoring_history.as_data_frame() scoring_history_df.to_parquet(os.path.join(output_dir, f"{file_prefix}_scoring_history.parquet")) knio.flow_variables['vpath_scoring_history'] = f"{file_prefix}_scoring_history.parquet" except AttributeError: print("Model has no scoring history.") # Shut down the H2O cluster: h2o.cluster().shutdown()v_path_mojo_file = knio.flow_variables['File path']v_output_path = knio.flow_variables['context.workflow.data-path']v_prefix = knio.flow_variables['File name']# Call the functionprocess_mojo_file(v_path_mojo_file, v_output_path, v_prefix) locate and create/data/ folderwith absolute pathsextractFile Namesdataset_regression_80.parquethttps://www.kaggle.com/c/house-prices-advanced-regression-techniques/overviewdataset_regression_20.parquethttps://www.kaggle.com/c/house-prices-advanced-regression-techniques/overviewAPPLYmy_glm_model.zipNode 721extract summarypy3_knime_h2o.ymlin the /data/ subfolderhttps://medium.com/p/2ac217792539my_glm_model_metrics.parquetmy_glm_model_params.parquetmy_glm_model_summary.parquetsummarymy_glm_model_varimp.parquetp_varimpp_summaryp_metricsp_paramsCollect LocalMetadata URL to File Path H2O Local Context Parquet Reader Parquet Reader Missing Value Missing Value(Apply) H2O Generalized LinearModel Learner (Regression) Table to H2O H2O Model to MOJO H2O MOJO Writer Path to URI Variable toTable Row Python Script Table Rowto Variable Conda EnvironmentPropagation Parquet Reader Parquet Reader Parquet Reader Transpose Parquet Reader Create File/FolderVariables Create File/FolderVariables Create File/FolderVariables Create File/FolderVariables

Nodes

Extensions

Links