Icon

kn_​example_​r_​docx_​import_​table

Read tables from Word documents (.docx) and import them into KNIME.
https://forum.knime.com/t/how-to-import-tables-from-docx-documents-via-r-snippet/19284/4?u=mlauber71 Read tables from Word documents (.docx) and import them into KNIME.In this example we have a changing table structure which is captured by Loop End workpath_r <- knime.flow.in[["context.workflow.absolute-path"]]workspace_name <- paste0(workpath_r, "/workspace_r.RData")dataobject_name <- paste0(workpath_r, "/datalist_r.rds")setwd(workpath_r) # Set work directorylibrary(docxtractr)v_documnet_to_read <- c(knime.flow.in[["v_kn_documnet_to_read"]])real_world <- read_docx(v_documnet_to_read)docx_tbl_count(real_world)tbls <- docx_extract_all_tbls(real_world)no_tbls <- length(tbls)docx_describe_tbls(real_world)docx_extract_all_tbls(real_world, guess_header = TRUE, preserve = FALSE, trim = TRUE)# https://stackoverflow.com/questions/29402528/append-data-frames-together-in-a-for-loop/29419402datalist = list()for (i in 1:no_tbls) { # ... make some data df <- docx_extract_tbl(real_world, i, header=TRUE) df$document <- v_documnet_to_read # add the document name df$table_no <- i # maybe you want to keep track of which iteration/table produced it? datalist[[i]] <- df # add it to your list}# save and load the working environment# save.image(workspace_name)# load(workspace_name)saveRDS(datalist, dataobject_name)# big_data = do.call(rbind, datalist)knime.out <- data.frame("no_tables"=no_tbls ) workpath_r <- knime.flow.in[["context.workflow.absolute-path"]]dataobject_name <- paste0(workpath_r, "/datalist_r.rds")# workspace_name <- paste0(workpath_r, "/workspace_r.RData")# load(workspace_name)datalist <- readRDS(dataobject_name)v_no_to_fetch <- knime.in$"v_no_to_fetch"knime.out <- as.data.frame(datalist[[v_no_to_fetch]]) import alltables into a tibbleand store them as.RDS fileSTARTENDcollect structurefetch tablefrom tibble objectv_kn_documnet_to_readwhich docxdocument to readv_no_to_fetchiterate the document noto fetch R Source (Table) collect meta data Table Rowto Variable Counting Loop Start Loop End R Snippet String Widget Math Formula https://forum.knime.com/t/how-to-import-tables-from-docx-documents-via-r-snippet/19284/4?u=mlauber71 Read tables from Word documents (.docx) and import them into KNIME.In this example we have a changing table structure which is captured by Loop End workpath_r <- knime.flow.in[["context.workflow.absolute-path"]]workspace_name <- paste0(workpath_r, "/workspace_r.RData")dataobject_name <- paste0(workpath_r, "/datalist_r.rds")setwd(workpath_r) # Set work directorylibrary(docxtractr)v_documnet_to_read <- c(knime.flow.in[["v_kn_documnet_to_read"]])real_world <- read_docx(v_documnet_to_read)docx_tbl_count(real_world)tbls <- docx_extract_all_tbls(real_world)no_tbls <- length(tbls)docx_describe_tbls(real_world)docx_extract_all_tbls(real_world, guess_header = TRUE, preserve = FALSE, trim = TRUE)# https://stackoverflow.com/questions/29402528/append-data-frames-together-in-a-for-loop/29419402datalist = list()for (i in 1:no_tbls) { # ... make some data df <- docx_extract_tbl(real_world, i, header=TRUE) df$document <- v_documnet_to_read # add the document name df$table_no <- i # maybe you want to keep track of which iteration/table produced it? datalist[[i]] <- df # add it to your list}# save and load the working environment# save.image(workspace_name)# load(workspace_name)saveRDS(datalist, dataobject_name)# big_data = do.call(rbind, datalist)knime.out <- data.frame("no_tables"=no_tbls ) workpath_r <- knime.flow.in[["context.workflow.absolute-path"]]dataobject_name <- paste0(workpath_r, "/datalist_r.rds")# workspace_name <- paste0(workpath_r, "/workspace_r.RData")# load(workspace_name)datalist <- readRDS(dataobject_name)v_no_to_fetch <- knime.in$"v_no_to_fetch"knime.out <- as.data.frame(datalist[[v_no_to_fetch]]) import alltables into a tibbleand store them as.RDS fileSTARTENDcollect structurefetch tablefrom tibble objectv_kn_documnet_to_readwhich docxdocument to readv_no_to_fetchiterate the document noto fetchR Source (Table) collect meta data Table Rowto Variable Counting Loop Start Loop End R Snippet String Widget Math Formula

Nodes

Extensions

Links