上海地铁数据可视化
已部署到shinyapps.io,詳見SHMetro
0.配置環境和加載包
## encoding options(encoding = "UTF-8") ## for chines ## use getOption("encoding") to see if things were changedloc <- function(os, language = "english") {switch(language,english = ifelse(os == "Windows", "English_United States.1252", "en_US.UTF-8"),chinese = ifelse(os == "Windows", "Chinese", "zh_CN.utf-8")) } ## set locale Sys.setlocale(category = "LC_ALL", loc(Sys.info()[["sysname"]], "chinese"))##加載工作環境和所需包 setwd("/Users/jeevanyue/Rproject/map/SHMetro") library(data.table) library(bit64) library(dplyr) library(tidyr) library(scales) library(lubridate) #日期處理包 library(shiny) library(leaflet) library(lattice) library(plotly) library(chorddiag) #繪制chord1. 地鐵數據
#地鐵站進站數據和出站數據 shmetro_in <- fread("data/shmetro_in.csv",encoding="UTF-8") shmetro_out <- fread("data/shmetro_out.csv",encoding="UTF-8")## 進出地鐵站數據 shmetro_line_in_out <- fread("data/shmetro_line_in_out.csv",encoding="UTF-8")## 進出地鐵站關聯 in_out <- shmetro_line_in_out %>%spread(line_out,count) in_out[is.na(in_out)]<-0## 地鐵站經緯度 stations <- fread("data/stations.csv",encoding="UTF-8") stations <- stations %>% select(c(1:5)) %>%arrange(line,line_id) stations_no <- nrow(stations) for (i in 1:stations_no) {s <- stations$station[i]stations$lines[i] <- paste(stations[stations$station==s,]$line,sep="",collapse="/") }1.1 地鐵站經緯度
stations <- fread("data/stations.csv",encoding="UTF-8") stations <- stations %>% select(c(1:5)) %>%arrange(line,line_id)stations_no <- nrow(stations) for (i in 1:stations_no) {s <- stations$station[i]stations$lines[i] <- paste(stations[stations$station==s,]$line,sep="",collapse="/") } invisible(gc())1.2 交通卡交易數據
交通卡的交易信息有7個字段,分別是:卡號、交易日期、交易時間、站點名稱、行業名稱、交易金額、交易性質。
卡號:交通卡卡號
交易日期:日期格式yyyy-mm-dd
交易時間:時間個是hh:mm:ss
站點名稱:內容包括線路和站名,如:"1號線莘莊"
行業名稱:都是"地鐵"
交易金額:0和大于0的值,0表示進站,大于0的值表示出戰
交易性質:"優惠"和"非優惠"
1.3 處理異常值
## 對與stations地鐵站名不一致的trade數據進行處理 trade_metro[trade_metro$station=="淞浜路",]$station <- "淞濱路" trade_metro[trade_metro$station=="大木橋路 ",]$station <- "大木橋路" trade_metro[trade_metro$station=="上海大學站",]$station <- "上海大學"1.4 進/出站數據
## 進站數據 trade_metro_in <- trade_metro %>%filter(money==0) %>%select(card_id,"time_in"=time,"line_in"=line,"station_in"=station,"M5_in"=M5) ## 出站數據 trade_metro_out <- trade_metro %>%filter(money>0)%>%select(card_id,"time_out"=time,"line_out"=line,"station_out"=station,money,"M5_out"=M5)1.5 虛擬換乘
上海火車站為虛擬換乘,刪除半小時內3/4換1和1換3/4的數據
3/4換1的數據
## 3/4換1的數據,統計發現在上海火車站3/4號線出站以3號線名義出站 trade_metro_out_34 <- trade_metro_out %>%filter(station_out=='上海火車站') %>%filter(line_out==3 | line_out==4) trade_metro_in_1 <- trade_metro_in %>%filter(station_in=='上海火車站', line_in==1)## merge出站和進站的數據 trade_metro_out34_in1 <- merge(trade_metro_out_34,trade_metro_in_1,all.x=T) %>%mutate(duration=period_to_seconds(hms(time_in)) - period_to_seconds(hms(time_out))) %>%filter(duration>0,duration<=60*30)### 數據大了什么樣的數據都有,發現有幾個人在3/4號線出站后,半小時內在1號線進站多次#trade_metro_out34_in1 <- na.omit(trade_metro_out34_in1)## 根據卡號和進站時間,查詢最近的進站時間,作為本次進站時間 trade_metro_out34_in1 <- data.table(trade_metro_out34_in1) trade_metro_out34_in1[, duration_min := min(duration), by=list(card_id, M5_in)] trade_metro_out34_in1 <- trade_metro_out34_in1 %>%filter(duration==duration_min) %>%select(-duration_min)## 統計發現絕大部分人在10分鐘內完成換乘 #histogram(ceiling(trade_metro_out34_in1$duration/60))## rbind出站數據 trade_metro_out <- rbind(trade_metro_out, trade_metro_out34_in1[,c(1:6)]) ## 刪除全部重復的出站數據 trade_metro_out <- trade_metro_out[!(duplicated(trade_metro_out) | duplicated(trade_metro_out, fromLast = TRUE)), ]## rbind進站數據 trade_metro_in <- rbind(trade_metro_in, trade_metro_out34_in1[,c(1,7:10)]) ## 刪除全部重復的進站數據 trade_metro_in <- trade_metro_in[!(duplicated(trade_metro_in) | duplicated(trade_metro_in, fromLast = TRUE)), ]1換3/4的數據
## 1換3/4的數據,統計發現在3/4號線上海火車站以3號線名義進站 trade_metro_out_1 <- trade_metro_out %>%filter(station_out=='上海火車站',line_out==1) trade_metro_in_34 <- trade_metro_in %>%filter(station_in=='上海火車站') %>%filter(line_in==3 | line_in==4)## merge出站和進站的數據 trade_metro_out1_in34 <- merge(trade_metro_out_1,trade_metro_in_34,all.x=T, all.y=F) %>%mutate(duration=period_to_seconds(hms(time_in)) - period_to_seconds(hms(time_out))) %>%filter(duration>0,duration<=60*30)#trade_metro_out1_in34 <- na.omit(trade_metro_out1_in34)## 根據卡號和進站時間,查詢最近的進站時間,作為本次進站時間 trade_metro_out1_in34 <- data.table(trade_metro_out1_in34) trade_metro_out1_in34[, duration_min := min(duration), by=list(card_id, M5_in)] trade_metro_out1_in34 <- trade_metro_out1_in34 %>%filter(duration==duration_min) %>%select(-duration_min)## 統計發現絕大部分人在10分鐘內完成換乘 #histogram(ceiling(trade_metro_out1_in34$duration/60))## rbind出站數據 trade_metro_out <- rbind(trade_metro_out, trade_metro_out1_in34[,c(1:6)]) ## 刪除全部重復的出站數據 trade_metro_out <- trade_metro_out[!(duplicated(trade_metro_out) | duplicated(trade_metro_out, fromLast = TRUE)), ]## rbind進站數據 trade_metro_in <- rbind(trade_metro_in, trade_metro_out1_in34[,c(1,7:10)]) ## 刪除全部重復的進站數據 trade_metro_in <- trade_metro_in[!(duplicated(trade_metro_in) | duplicated(trade_metro_in, fromLast = TRUE)), ]1.6 地鐵站進站數據
根據消費金額為0,每5分鐘統計每站地鐵的進站人數
trade_metro_in_station <- trade_metro_in %>%group_by(station_in, M5_in) %>%summarise(count=n()) %>%select(station=station_in, M5=M5_in, count)#trade_metro_in_station <- na.omit(trade_metro_in_station) invisible(gc())## 合并地鐵站坐標 shmetro_in <- merge(trade_metro_in_station,stations,all.x=T, all.y=F) #rm(trade_metro_in_station)## 查看未匹配到的地鐵站 #l <- shmetro_in[is.na(shmetro_in$gps_lat),] #unique(l$station) #trade_metro_in_station[trade_metro_in_station$station=="淞浜路",]$station <- "淞濱路" #trade_metro_in_station[trade_metro_in_station$station=="大木橋路 ",]$station <- "大木橋路" #trade_metro_in_station[trade_metro_in_station$station=="上海大學站",]$station <- "上海大學" #stations[grepl("淞濱路", stations$station),]$station #trade_metro_in_station[grepl("淞浜路", trade_metro_in_station$station),]$station <- "淞濱路"#shmetro_in <- na.omit(shmetro_in) invisible(gc()) #write.csv(shmetro_in,"shmetro_in.csv",row.names = F,fileEncoding="UTF-8")1.7 地鐵站出站數據
根據消費金額大雨0,每5分鐘統計每站地鐵的出站人數
trade_metro_out_station <- trade_metro_out %>%group_by(station_out, M5_out) %>%summarise(count=n()) %>%select(station=station_out, M5=M5_out, count)#trade_metro_out_station <- na.omit(trade_metro_out_station) invisible(gc())## 合并地鐵站坐標 shmetro_out <- merge(trade_metro_out_station,stations,all.x=T, all.y=F) #rm(trade_metro_out_station)#shmetro_out <- na.omit(shmetro_out) invisible(gc()) #write.csv(shmetro_out,"shmetro_out.csv",row.names = F,fileEncoding="UTF-8")1.8 地鐵線路起始和終點
## merge進站和出站數據,并計算乘坐時間 trade_metro_in_out <- merge(trade_metro_in, trade_metro_out, all.x=T, all.y=F) %>%mutate(duration=period_to_seconds(hms(time_out)) - period_to_seconds(hms(time_in)), duration_M5=M5_out-M5_in) %>%filter(duration>0)#根據卡號和進站時間,查詢最近出站的時間,作為本次出站時間 trade_metro_in_out <- data.table(trade_metro_in_out) trade_metro_in_out[, duration_min := min(duration), by=list(card_id, M5_in)] trade_metro_in_out <- trade_metro_in_out %>%filter(duration==duration_min) %>%select(-duration_min)#統計進站線路A->出站線路B的筆數 shmetro_line_in_out <- trade_metro_in_out %>%group_by(line_in,line_out) %>%summarise(count=n())shmetro_line_in_out$line_in <- as.numeric(shmetro_line_in_out$line_in) shmetro_line_in_out$line_out <- as.numeric(shmetro_line_in_out$line_out)#排序 shmetro_line_in_out <- arrange(shmetro_line_in_out,line_in,line_out)#將出站線路數據轉換為屬性字段 in_out <- shmetro_line_in_out %>%spread(line_out,count)in_out[is.na(in_out)]<-0#write.csv(shmetro_line_in_out,"shmetro_line_in_out.csv",row.names = F,fileEncoding="UTF-8")2. 繪圖
2.1 相關數據及地圖
#地鐵顏色 lines_color <- data.frame("line"=c(1:13,16),"color"=c("#ED3229","#36B854","#FFD823","#320176","#823094","#CF047A","#F3560F","#008CC1","#91C5DB","#C7AFD3","#8C2222","#007a61","#ec91cc","#32D2CA"))pal <- colorFactor(as.character(lines_color$color), domain = stations$line)#輔助函數繪制線路 draw_line_add <- function(l_no,line_s_id=NULL){line_color <- lines_color[lines_color$line==l_no,]$colorline_data <- stations[stations$line==l_no,]if(is.null(line_s_id)){draw_lines <- Shanghai %>%addPolylines(lat=line_data$gps_lat,lng=line_data$gps_lon,color=line_color,weight=2)}else{draw_lines <- Shanghai %>%addPolylines(lat=line_data$gps_lat[line_s_id],lng=line_data$gps_lon[line_s_id],color=line_color,weight=2)}return(draw_lines) }## 上海線路地圖 Shanghai <- leaflet() %>% setView(lng = 121.60, lat = 31.20, zoom = 10) %>% addProviderTiles("CartoDB.Positron") %>%addLegend(position = "bottomleft",pal=pal,values = stations$line)for(l in unique(stations$line)){line_length <- nrow(stations[stations$line==l,])if(l==4){#由于4號線為環線,需將首尾相連Shanghai <- draw_line_add(l_no=l)Shanghai <- draw_line_add(l_no=l,line_s_id=c(1,line_length))}else if(l==10){#由于10號線在龍溪路站以后分為兩條線路,需分兩端繪制Shanghai <- draw_line_add(l_no=l,line_s_id=c(1:(line_length-3)))Shanghai <- draw_line_add(l_no=l,line_s_id=c(24,(line_length-2):line_length))}else if(l==11){#由于11號線在嘉定新城站以后分為兩條線路,需分兩端繪制Shanghai <- draw_line_add(l_no=l,line_s_id=c(1:(line_length-7)))Shanghai <- draw_line_add(l_no=l,line_s_id=c(28,(line_length-6):line_length))}else{Shanghai <- draw_line_add(l_no=l)} }2.2 chord圖數據
##繪制chord圖 metro_chord <- data.matrix(as.data.frame(in_out)[,c(2:15)]) haircolors <- in_out$line_in dimnames(metro_chord) <- list(have = haircolors,prefer = colnames(metro_chord))groupColors <- c("#ED3229","#36B854","#FFD823","#320176","#823094","#CF047A","#F3560F","#008CC1","#91C5DB","#C7AFD3","#8C2222","#007a61","#ec91cc","#32D2CA")#chorddiag(metro_chord, groupColors = groupColors, margin=50, showTicks=F, groupnamePadding = 5)2.3 圖形參數
b <- list(x = 0, y = 1,bgcolor = "#00FFFFFF") yax <- list(title = "",zeroline = FALSE,showline = FALSE,showticklabels = FALSE,showgrid = FALSE )xax <- list(title = "",titlefont = list(size = 8),tickangle = -20,color = "black" )2.4 UI 和 SERVER
ui <- shinyUI(navbarPage("SHMetro",tabPanel("進站流量",div(class="outer",#tags$style(type = "text/css", "html, body {width:100%;height:100%}"),tags$style(type = "text/css", ".outer {position: fixed; top: 41px; left: 0; right: 0; bottom: 0; overflow: hidden; padding: 0}"),leafletOutput("map", width = "100%", height = "100%"),absolutePanel(top = 10, right = 10,h4(textOutput("output_slider_time")),sliderInput("slider_time", "Time:",#min=as.POSIXct(min(filter(shmetro_in, M5>30)$M5)*5*60, origin = "2015-04-01", tz = "GMT"),#max=as.POSIXct(max(shmetro_in$M5)*5*60, origin = "2015-04-01", tz = "GMT"),#value=as.POSIXct(min(shmetro_in$M5)*5*60, origin = "2015-04-01", tz = "GMT"),min = as.POSIXct(5*60*60, origin = "2015-04-01", tz = "GMT"),max = as.POSIXct(24*60*60, origin = "2015-04-01", tz = "GMT"),value = as.POSIXct(5*60*60, origin = "2015-04-01", tz = "GMT"),step = 60*5,timeFormat = "%T",timezone = "GMT"),selectInput("select_line", "Line",c("All",lines_color$line)),h4("TOP 5"),plotlyOutput("in_top5",height = 200),checkboxInput("legend", "Show legend", TRUE)))),tabPanel("出站流量",div(class="outer",#tags$style(type = "text/css", "html, body {width:100%;height:100%}"),tags$style(type = "text/css", ".outer {position: fixed; top: 41px; left: 0; right: 0; bottom: 0; overflow: hidden; padding: 0}"),leafletOutput("map_out", width = "100%", height = "100%"),absolutePanel(top = 10, right = 10,h4(textOutput("output_slider_time_out")),sliderInput("slider_time_out", "Time:",#min=as.POSIXct(min(filter(shmetro_in, M5>30)$M5)*5*60, origin = "1960-01-01", tz = "GMT"),#max=as.POSIXct(max(shmetro_in$M5)*5*60, origin = "1960-01-01", tz = "GMT"),#value=as.POSIXct(min(shmetro_in$M5)*5*60, origin = "1960-01-01", tz = "GMT"),min = as.POSIXct(5*60*60, origin = "2015-04-01", tz = "GMT"),max = as.POSIXct(24*60*60, origin = "2015-04-01", tz = "GMT"),value = as.POSIXct(5*60*60, origin = "2015-04-01", tz = "GMT"),step = 60*5,timeFormat = "%T",timezone = "GMT"),selectInput("select_line_out", "Line",c("All",lines_color$line)),h4("TOP 5"),plotlyOutput("out_top5",height = 200),checkboxInput("legend_out", "Show legend", TRUE)))),tabPanel("線路關聯",div(class="outer",#tags$style(type = "text/css", "html, body {width:100%;height:100%}"),tags$style(type = "text/css", ".outer {position: fixed; top: 41px; left: 0; right: 0; bottom: 0; overflow: hidden; padding: 0}"),chorddiagOutput("line_chord", width = "100%",height="100%")))) )server <- shinyServer(function(input, output, session) {## 進站流量統計# Reactive expression for the data subsetted to what the user selectedfilteredData <- reactive({if(input$select_line=="All"){shmetro_in %>%filter(M5==ceiling(period_to_seconds(hms(format(input$slider_time,"%H:%M:%S")))/300))}else{shmetro_in %>%filter(M5==ceiling(period_to_seconds(hms(format(input$slider_time,"%H:%M:%S")))/300),line==as.numeric(input$select_line))}})stations_in_top5 <- reactive({filteredData() %>%group_by(station) %>%summarise(count=sum(count),line=min(line)) %>%arrange(desc(count)) %>%head(5) %>%as.data.frame()})## timeoutput$output_slider_time <- renderText({paste0("Time: ", format(input$slider_time,"%H:%M:%S"))})output$map <- renderLeaflet({Shanghai %>%addCircles(stations$gps_lon, stations$gps_lat,color = pal(stations$line), radius=1,popup = paste(stations$station,stations$lines),fillOpacity = 1,stroke = FALSE) %>%clearMarkerClusters() %>%clearMarkers()})observe({data_in_circle <- data.table(filteredData())[, count := sum(count), by=list(station, M5)] %>%arrange(count)leafletProxy("map", data = data_in_circle) %>%clearMarkerClusters() %>%clearMarkers() %>%addCircleMarkers(data_in_circle$gps_lon,data_in_circle$gps_lat, color = pal(data_in_circle$line), fillOpacity = 0.5,stroke = FALSE, popup=paste(data_in_circle$station,data_in_circle$line,data_in_circle$count,sep=","), radius=(data_in_circle$count)^(1/2.5))})# top5output$in_top5 <- renderPlotly({# If no stations_in_top5 are in view, don't plotif (nrow(stations_in_top5()) == 0)return(NULL)plot_ly(stations_in_top5(),x = stations_in_top5()$station,y = stations_in_top5()$count,type = "bar",marker = list(color = pal(stations_in_top5()$line)),bgcolor = "#00FFFFFF") %>%layout(showlegend=FALSE,yaxis=yax,xaxis=xax,plot_bgcolor='#00FFFFFF',paper_bgcolor='#00FFFFFF')})# Use a separate observer to recreate the legend as needed.observe({proxy <- leafletProxy("map")# Remove any existing legend, and only if the legend is# enabled, create a new one.proxy %>% clearControls()if (input$legend) {proxy %>% addLegend(position = "bottomleft",pal=pal,values = stations$line)}})## 出站流量統計# Reactive expression for the data subsetted to what the user selectedfilteredData_out <- reactive({if(input$select_line_out=="All"){shmetro_out %>%filter(M5==ceiling(period_to_seconds(hms(format(input$slider_time_out,"%H:%M:%S")))/300))}else{shmetro_in %>%filter(M5==ceiling(period_to_seconds(hms(format(input$slider_time_out,"%H:%M:%S")))/300),line==as.numeric(input$select_line_out))}})stations_out_top5 <- reactive({filteredData_out() %>%group_by(station) %>%summarise(count=sum(count),line=min(line)) %>%arrange(desc(count)) %>%head(5) %>%as.data.frame()})## timeoutput$output_slider_time_out <- renderText({paste0("Time: ", format(input$slider_time_out,"%H:%M:%S"))})output$map_out <- renderLeaflet({Shanghai %>%addCircles(stations$gps_lon, stations$gps_lat,color = pal(stations$line), radius=1,popup = paste(stations$station,stations$lines),fillOpacity = 1,stroke = FALSE) %>%clearMarkerClusters() %>%clearMarkers()})observe({data_out_circle <- data.table(filteredData_out())[, count := sum(count), by=list(station, M5)] %>%arrange(count)leafletProxy("map_out", data = filteredData_out()) %>%clearMarkerClusters() %>%clearMarkers() %>%addCircleMarkers(data_out_circle$gps_lon, data_out_circle$gps_lat, color = pal(data_out_circle$line),fillOpacity = 0.5,stroke = FALSE, popup=paste(data_out_circle$station,data_out_circle$line,data_out_circle$count,sep=","), radius=(data_out_circle$count)^(1/2.5))})# top5output$out_top5 <- renderPlotly({# If no stations_in_top5 are in view, don't plotif (nrow(stations_out_top5()) == 0)return(NULL)plot_ly(stations_out_top5(),x = stations_out_top5()$station,y = stations_out_top5()$count,type = "bar",marker = list(color = pal(stations_out_top5()$line)),bgcolor = "#00FFFFFF") %>%layout(showlegend=FALSE,yaxis=yax,xaxis=xax,plot_bgcolor='#00FFFFFF',paper_bgcolor='#00FFFFFF')})# Use a separate observer to recreate the legend as needed.observe({proxy <- leafletProxy("map_out")# Remove any existing legend, and only if the legend is# enabled, create a new one.proxy %>% clearControls()if (input$legend_out) {proxy %>% addLegend(position = "bottomleft",pal=pal,values = stations$line)}})## 線路關聯output$line_chord <- renderChorddiag({chorddiag(metro_chord, groupColors = groupColors, showTicks=F, groupnamePadding = 5)}) })2.5 運行shinyApp
shinyApp(ui = ui,server = server)進站流量
出站流量
進出地鐵
總結
- 上一篇: Hql中使用in参数
- 下一篇: 深入理解脚本化CSS系列第五篇——动态样