Tools
Download gephi - https:\/\/gephi.org
DOT Format - https:\/\/en.wikipedia.org\/wiki\/DOT_(graph_description_language)
Python Packages
pip install networkx
pip install pygraphviz
Input
{"creation_datetime": "2016-09-21 22:35:26.870000",
"active_duration": 279740.0,
"loading_duration": 893.0,
"individual_id": "A1BF7465FD4ACD0B1D",
"duration": 279740.0,
"cookie_id": "d2284f777b084c1ba6bb9065a96bdf75",
"logic1": {"cub平台": 6, "信用卡": 2},
"logic2": {"cub平台": 6, "costco卡": 2},
"intention": {"ㄧ般購物": 8},
"logic": {"信用卡_costco卡": 2, "cub平台_cub平台": 6
}
Code
import json
import gzip
import networkx as nx
from networkx.drawing.nx_agraph import write_dot
def luigi_run(filepath):
g = nx.Graph()
for filepath in [filepath]:
with gzip.open(filepath, "rb") as in_file:
is_header = True
for line in in_file:
if is_header:
is_header = False
else:
o = json.loads(line.decode("utf-8").strip())
if cookie_id != "cookie_id":
products, intentions = o["logic"], o["intention"]
total_count = sum([c for c in products.values()])
for shape, item in zip(["triangle", "box"], [products, intentions]):
for k, v in item.items():
k, v = norm_str(k), float(v)/total_count
if not g.has_node(cookie_id):
g.add_node(cookie_id, shape="circle")
if not g.has_node(k):
g.add_node(k, shape=shape)
if g.has_edge(cookie_id, k):
g[cookie_id][k]["weight"] += v
else:
g.add_weighted_edges_from([(cookie_id, k, v)])
write_dot(g, self.output().fn)
if __name__ == "__main__":
luigi_run("cookie_2016-09-21.tsv.gz")