pytorch 笔记: 复现论文 Stochastic Weight Completion for Road Networks using Graph Convolutional Networks
1 理論部分
論文筆記:Stochastic Weight Completion for Road Networks using Graph Convolutional Networks_UQI-LIUWJ的博客-CSDN博客
2 導(dǎo)入庫(kù)
import torch import torch.nn.functional as F import numpy as np import pandas as pd import os from torch_geometric.data import Data, DataLoader from torch_geometric.utils import normalized_cut from torch_geometric.nn import (ChebConv, graclus, GCNConv, max_pool, max_pool_x, global_mean_pool) from toolz.curried import *3 數(shù)據(jù)集處理
數(shù)據(jù)集來(lái)源是uber movement 以及紐約的osm 地圖數(shù)據(jù)
Uber Movement: Let's find smarter ways forward, together.
np.random.seed(123) torch.manual_seed(123) uberdir = "D:/" nykjuly = os.path.join(uberdir, "movement-speeds-hourly-new-york-2019-7.csv.zip") device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')?nodes, edges = load_gdfs("data/newyork/") #There are 4588 nodes and 9893 edges in the road networks. df = pd.read_csv(nykjuly)#獲取一個(gè)月的出行數(shù)據(jù)df = attach_edgeid(nodes, edges, df) #原來(lái)的shape:(25365815, 13) #去掉了['segment_id','start_junction_id','end_junction_id']三列,加上了edge_id(表示這條邊是edges里面的第edge_id條邊】) #同時(shí)只保留起止邊和way_id都在edges里面的邊 #去掉之后的shape:(2653457, 11),少了很多條出行記錄dG = edge_topology_from_edges(edges) #相當(dāng)于邊變成點(diǎn),如果原圖兩條邊在一個(gè)點(diǎn)相交,那么轉(zhuǎn)換之后的圖中兩個(gè)點(diǎn)有連邊 #Construct edge topology from the `edges` (The graph with road segments as nodes).n_test = 3 * 24 #一個(gè)月最后三天作為測(cè)試集,之前的是訓(xùn)練集 n_epochs = 5 batch_size = 1obs, unobs = split_obs_unobs(df, ratio=0.9) #obs——744個(gè)小時(shí),每一個(gè)小時(shí)90%的數(shù)據(jù)(DataFrame) #uobs——744個(gè)小時(shí),每一個(gè)小時(shí)后10%的數(shù)據(jù)(DataFrame) obs = [g for (_, g) in obs.groupby(['month', 'day', 'hour'])] unobs = [g for (_, g) in unobs.groupby(['month', 'day', 'hour'])] #obs——744個(gè)小時(shí),每一個(gè)小時(shí)10%的數(shù)據(jù)(list)#劃分有觀(guān)測(cè)點(diǎn)的數(shù)據(jù)和無(wú)觀(guān)測(cè)點(diǎn)的數(shù)據(jù)(的ground truth)3.1 load_gdfs
?導(dǎo)入地圖數(shù)據(jù),獲得點(diǎn)集和邊集
#導(dǎo)入地圖數(shù)據(jù) def load_gdfs(datadir: str) -> Tuple[GeoDataFrame, GeoDataFrame]:"""Load the nodes and edges GeoDataFrame from graphmlsaved by `save_graphml_from_places`.Usage:nodes, edges = load_gdfs("data/newyork/")"""G = ox.load_graphml(os.path.join(datadir, "graph.graphml")) #導(dǎo)入地圖數(shù)據(jù)nodes, edges = ox.graph_to_gdfs(G, nodes=True, edges=True) #?Convert a MultiDiGraph to node and/or edge GeoDataFrames.nodes['osmid'] = nodes.index.values## the centroid coordindates of road segmentspoints = edges.geometry.to_crs(epsg=3395).centroid#切換crs投影坐標(biāo),同時(shí)point表示邊的質(zhì)心coords = pipe(points.map(lambda p: (p.x, p.y)).values, map(list), list, np.array)#將質(zhì)心提取為ndarray的二維數(shù)組,每一個(gè)元素是質(zhì)心的橫縱坐標(biāo)(變成ndarray的原因是方便找到每一列的min)coords = coords - coords.min(axis=0)#減去每一列的最小值edges['coords'] = pipe(coords, map(tuple), list)#變回list,添加到coords這一列中edges['osmid'] = edges.osmid.map(lambda x: x if isinstance(x, list) else [x])u, v, _ = list(zip(*edges.index))edges["u"] = uedges["v"] = vedges['id'] = np.arange(edges.shape[0])edges.set_index('id', inplace=True, drop=False)#將id作為edges的活躍列(從0開(kāi)始的數(shù)字)print(f"There are {nodes.shape[0]} nodes and {edges.shape[0]} edges in the road networks.")#導(dǎo)入多少列,出來(lái)多少列return nodes, edges類(lèi)似于simplified操作
同時(shí)將newyork里面的crs坐標(biāo)系轉(zhuǎn)換成uber使用的坐標(biāo)系
nodes幾乎沒(méi)動(dòng)
edges加了一個(gè)coords條目,表示的是邊的質(zhì)心,同時(shí)edges的活躍列改為id?
nodes:
edges:?
?
?
3.2 attach_edgeid?
def attach_edgeid(nodes: GeoDataFrame, edges: GeoDataFrame, df: DataFrame) -> DataFrame:"""Filter and attaching uber one-month dataframe `df` a graph edge-id column, where the edge id is determined by (u, v, osmid) and only rows with edge id are kept.Usage:mh = attach_edgeid(nodes, edges, df)"""## filtering by node idssdf = df[df.osm_start_node_id.isin(nodes.osmid)&df.osm_end_node_id.isin(nodes.osmid)].copy()#start和end的點(diǎn)都在nodes里面的那些df行## dropping columns that will not be usedsdf.drop(["segment_id", "start_junction_id", "end_junction_id"], axis=1, inplace=True)#丟棄這三行edgeidmap = {(u, v): (osmid, edgeid) for (u, v, osmid, edgeid) in zip(edges.u, edges.v, edges.osmid, edges.id)}#對(duì)edges中的這四個(gè)屬性,拼成一個(gè)字典def getedgeid(u: int, v: int, osmid: int) -> int:"""Map the (u, v, osmid) tuple to the corresponding graph edge id and return -1 if there is no such edge in the graph."""osmids, edgeid = get((u, v), edgeidmap, ([-1], -1))#在edgeidmap中找(u,v),如果找到了,返回edgeid,否則,返回-1return edgeid if osmid in osmids else -1#如果osmids有這個(gè)osmid,那么成立,否則,不成立edge_idx_cols = ['osm_start_node_id', 'osm_end_node_id', 'osm_way_id']sdf['edgeid'] = sdf[edge_idx_cols].apply(lambda x: getedgeid(*x), axis=1)#首先,判斷一條記錄的起點(diǎn)和終點(diǎn)在不在edges的起止點(diǎn)上;其次,判斷這條邊的id在不在edges (u,v)對(duì)應(yīng)的里面sdf = sdf[sdf.edgeid >= 0]#留下存在的邊return sdf3.3?edge_topology_from_edges?
def edge_topology_from_edges(edges: GeoDataFrame) -> Graph:"""Construct edge topology from the `edges` (The graph with road segments as nodes).nx.line_graph() can construct the line graph directly from the original graph.Argsedges: Geodataframe returned by load_gdfs.ReturnsG: A undirected graph whose node ids are edge ids in `edges`."""triple = pd.concat([pd.DataFrame({'id': edges.id, 'u': edges.u, 'v': edges.v}),pd.DataFrame({'id': edges.id, 'u': edges.v, 'v': edges.u})],ignore_index=True)#一條邊兩個(gè)方向pairs = []for (_, g) in triple.groupby('u'):pairs += [(u, v) for u in g.id for v in g.id if u != v]for (_, g) in triple.groupby('v'):pairs += [(u, v) for u in g.id for v in g.id if u != v]#同時(shí)從一個(gè)點(diǎn)出發(fā)\同時(shí)從一個(gè)點(diǎn)到達(dá)的邊000G = Graph()G.add_edges_from(pairs)#相當(dāng)于邊變成點(diǎn),如果原圖兩條邊在一個(gè)點(diǎn)相交,那么轉(zhuǎn)換之后的圖中兩個(gè)點(diǎn)有連邊return G ''' 某一個(gè)的GROUPBYid u v 0 0 42421728 42432736 1 1 42421728 42435337 2 2 42421728 42421731 9898 5 42421728 42421731 12211 2318 42421728 42432736 12942 3049 42421728 42435337'''3.4??split_obs_unobs
def split_dataframe(df: DataFrame, ratio: Optional[float]=0.9) -> Tuple[DataFrame, DataFrame]:"""Split a dataframe into two parts along the row dimension by the given ratio."""k = int(df.shape[0] * ratio)#要選擇的行數(shù)idx = np.random.permutation(df.shape[0])#隨機(jī)排列序號(hào),劃分為前k個(gè)和后面的部分 return df.iloc[idx[:k]], df.iloc[idx[k:]]def split_obs_unobs(df: DataFrame, ratio: Optional[float]=0.9) -> Tuple[DataFrame, DataFrame]:"""Split a one-month dataframe into observed and unobserved dataframes.Returnstrn: Observations for a fraction of road segments.tst: Ground truth for road segments to be inferred. """## we should guarantee the results are invariant to calling order. np.random.seed(123)dfs = [split_dataframe(g, ratio=ratio) for (_, g) in df.groupby(['month', 'day', 'hour'])]trn = pd.concat(pipe(dfs, map(first), list))tst = pd.concat(pipe(dfs, map(second), list))return trn, tst3.5 uber_movement數(shù)據(jù)集補(bǔ)充說(shuō)明
| len(osm_ids.osm_way_id) | 33320 |
| len(osm_ids.osm_start_node_id) | 58601 |
| len(osm_ids.osm_end_node_id) | 58605 |
?只有osm_way_id、osm_start_node_id、osm_end_node_id 加起來(lái),才能唯一確定一個(gè)子路段
?原因是因?yàn)?#xff0c;即使我們知道了osm_start_node_id、osm_end_node_id,但因?yàn)榭赡苡械孛?、高架等不同的重疊路段,所以osm_way_id可能會(huì)不同(比如上圖,黃顏色的是高架,黑線(xiàn)是地面道路。畫(huà)紅色箭頭的是兩個(gè)高架匝道,所以相同的osm_start_node_id、osm_end_node_id可能分別對(duì)應(yīng)了地面和高架)
4 dataloader
trn_list = [get_data(dG, o, u) for (o, u) in zip(obs[:-n_test], unobs[:-n_test])] tst_list = [get_data(dG, o, u) for (o, u) in zip(obs[-n_test:], unobs[-n_test:])] #train_list的每一個(gè)元素都是torch_geometric的元素 #以第一個(gè)小時(shí)為例:Data(x=[9893, 1], edge_index=[2, 34637], y=[9893, 1]) #紐約一共有9893條邊【edges.shape[0]】(在這里表示9893個(gè)點(diǎn)) #這邊的”邊“相當(dāng)于是,如果在紐約兩條邊有公共點(diǎn), #那么在我們的圖上,邊對(duì)應(yīng)的點(diǎn)它們就相連 #trn_list和tst_list每一個(gè)元素是一個(gè)data數(shù)據(jù)trn_loader = DataLoader(trn_list, batch_size=batch_size) tst_loader = DataLoader(tst_list, batch_size=batch_size)4.1 get_data
def get_x(df: DataFrame, num_nodes: int) -> torch.FloatTensor:"""Get pytorch geometric input feature from observation dataframe.Inputsdf: The observation dataframe with edgeid being attached. Returnsx (num_nodes, num_features): Input feature tensor. """node_obs = {u: [v] for (u, v) in zip(df.edgeid.values, df.speed_mph_mean.values)}## (num_nodes, 1)#一個(gè)字典,鍵值是這一小時(shí)每個(gè)edge_id,value是這個(gè)edge_id對(duì)應(yīng)的平均速度return torch.FloatTensor([get(u, node_obs, [0]) for u in range(num_nodes)]) #訓(xùn)練集:[速度] 其他的都是[0] #測(cè)試集:[速度] 其他的都是[0]def get_data(G: Graph, obs: DataFrame, unobs: DataFrame) -> Data:#obs和unobs是某一個(gè)小時(shí)觀(guān)測(cè)數(shù)據(jù)和未觀(guān)測(cè)數(shù)據(jù)edge_index = get_edge_index(G)#utils中的函數(shù),將G的邊集轉(zhuǎn)換成Tensor,然后轉(zhuǎn)置x = get_x(obs, G.number_of_nodes())y = get_x(unobs, G.number_of_nodes())#訓(xùn)練集:速度 其他的都是0#測(cè)試集:速度 其他的都是0return Data(x=x, edge_index=edge_index, y=y)5 model部分?
?
?
model = ChebNet(1, dG.number_of_nodes()).to(device) optimizer = torch.optim.Adam(model.parameters(), lr=0.001) class ChebNet(torch.nn.Module):def __init__(self, num_features, num_nodes):super(ChebNet, self).__init__()self.conv1 = ChebConv(num_features, 32, 2)#切比雪夫近似的GCN,2階切比雪夫多項(xiàng)式近似(輸入維度num_features,輸出維度32,2階切比雪夫)self.conv2 = ChebConv(32, 64, 2)#切比雪夫近似的GCN,2階切比雪夫多項(xiàng)式近似(輸入維度32,輸出維度64,2階切比雪夫)self.fc1 = torch.nn.Linear(64, 128)self.fc2 = torch.nn.Linear(128, num_nodes)#兩層全連接層def forward(self, data):#以第一張圖為例:#data:Batch(x=[9893, 1], edge_index=[2, 34637], y=[9893, 1], batch=[9893], ptr=[2])x = F.relu(self.conv1(data.x, data.edge_index))#切比雪夫近似GCN+RELU#x:torch.Size([9893, 1])cluster = graclus(data.edge_index, num_nodes=x.shape[0])#圖點(diǎn)分類(lèi),cluster是一個(gè)x.shape[0]維度的Tensor,表示每個(gè)點(diǎn)所在的cluster#cluster:torch.Size([9893])data = max_pool(cluster, Data(x=x, batch=data.batch, edge_index=data.edge_index))#data:Batch(x=[5870, 32], edge_index=[2, 22026], batch=[5870])#max_pool操作,根據(jù)cluster的分簇情況,重新構(gòu)造圖datax = F.relu(self.conv2(data.x, data.edge_index))#x:torch.Size([5847, 64])cluster = graclus(data.edge_index, num_nodes=x.shape[0])#cluster:5847維的tensorx, batch = max_pool_x(cluster, x, data.batch)'''x.shape,batch.shape(torch.Size([3436, 64]), torch.Size([3436]))+'''x = global_mean_pool(x, batch)#torch.Size([1, 64])x = F.relu(self.fc1(x))#torch.Size([1, 128])x = F.dropout(x, training=self.training)x = self.fc2(x)#torch.Size([1, num_nodes])return x6 訓(xùn)練模型
for epoch in range(n_epochs):train(epoch, optimizer, trn_loader, model, device) def train(epoch, optimizer, train_loader, model, device):model.train()losses = []for data in train_loader:data = data.to(device)xhat = model(data)#預(yù)測(cè)的各個(gè)點(diǎn)的速度## -> (batch_size, num_nodes)x = data.x.reshape(xhat.shape)nz = x > 0#保留觀(guān)測(cè)集的那些點(diǎn)loss = F.mse_loss(xhat[nz], x[nz], reduction='sum') / nz.sum().item()optimizer.zero_grad()loss.backward()optimizer.step()#老三部曲losses.append(loss.item())print(f"Epoch is {epoch}, Training Loss is {np.mean(losses):.5f}") ''' Epoch is 0, Training Loss is 55.03807 Epoch is 1, Training Loss is 29.84954 Epoch is 2, Training Loss is 21.36361 Epoch is 3, Training Loss is 19.08718 Epoch is 4, Training Loss is 18.11195 Epoch is 5, Training Loss is 18.60411 Epoch is 6, Training Loss is 17.49593 Epoch is 7, Training Loss is 17.83597 Epoch is 8, Training Loss is 17.09360 Epoch is 9, Training Loss is 17.26834 Epoch is 10, Training Loss is 17.15905 Epoch is 11, Training Loss is 16.93761 Epoch is 12, Training Loss is 16.54925 Epoch is 13, Training Loss is 16.65559 Epoch is 14, Training Loss is 16.71426 Wall time: 12min 2s '''總結(jié)
以上是生活随笔為你收集整理的pytorch 笔记: 复现论文 Stochastic Weight Completion for Road Networks using Graph Convolutional Networks的全部?jī)?nèi)容,希望文章能夠幫你解決所遇到的問(wèn)題。
- 上一篇: torch_geometric 笔记:g
- 下一篇: python 包介绍:osmnx