score:0

Try this:

library(tidyverse)
library(stringr)
df <- tribble(
~number, ~clientID,         ~node1,
1 ,    23969, '1 Community Services',
2 ,    39199,      '1 Youth Justice',
3 ,    23595,      '1 Mental Health',
4 ,    15867, '1 Community Services',
5 ,    18295,            '3 Housing',
6 ,    18295,            '2 Housing',
7 ,    18295, '1 Community Services',
8 ,    18295,            '4 Housing',
9 ,    15253,            '1 Housing',
10,    27839, '1 Community Services')

df2 <- mutate(df, step=as.numeric(str_sub(node1, end=1))) %>%
  spread(step, node1, sep='_') %>%
  group_by(clientID) %>%
  summarise(step1 = sort(unique(step_1))[1],
            step2 = sort(unique(step_2))[1],
            step3 = sort(unique(step_3))[1],
            step4 = sort(unique(step_4))[1])

df3 <- bind_rows(select(df2,1,source=2,target=3),
            select(df2,1,source=3,target=4),
            select(df2,1,source=4,target=5)) %>%
  group_by(source, target) %>%
  summarise(clients=n())

and to use that with networkD3...

links <- df3 %>% 
  dplyr::ungroup() %>% # ungroup just to be safe
  dplyr::filter(!is.na(source) & !is.na(target)) # remove lines without a link

# build the nodes data frame based on nodes in your links data frame
nodeFactors <- factor(sort(unique(c(links$source, links$target))))
nodes <- data.frame(name = nodeFactors)

# convert the source and target values to the index of the matching node in the 
# nodes data frame
links$source <- match(links$source, levels(nodeFactors)) - 1
links$target <- match(links$target, levels(nodeFactors)) - 1

# plot
library(networkD3)
sankeyNetwork(Links = links, Nodes = nodes, Source = 'source', 
              Target = 'target', Value = 'clients', NodeID = 'name')

Related Query