Tools

Download gephi - https:\/\/gephi.org

DOT Format - https:\/\/en.wikipedia.org\/wiki\/DOT_(graph_description_language)

Python Packages

pip install networkx
pip install pygraphviz

Input

{"creation_datetime": "2016-09-21 22:35:26.870000", 
 "active_duration": 279740.0,
 "loading_duration": 893.0,
 "individual_id": "A1BF7465FD4ACD0B1D",
 "duration": 279740.0,
 "cookie_id": "d2284f777b084c1ba6bb9065a96bdf75", 
 "logic1": {"cub平台": 6, "信用卡": 2},
 "logic2": {"cub平台": 6, "costco卡": 2},
 "intention": {"ㄧ般購物": 8},
 "logic": {"信用卡_costco卡": 2, "cub平台_cub平台": 6
}

Code

import json
import gzip
import networkx as nx

from networkx.drawing.nx_agraph import write_dot

def luigi_run(filepath):
    g = nx.Graph()
    for filepath in [filepath]:
        with gzip.open(filepath, "rb") as in_file:
            is_header = True
            for line in in_file:
                if is_header:
                    is_header = False
                else:
                    o = json.loads(line.decode("utf-8").strip())
                    if cookie_id != "cookie_id":
                        products, intentions = o["logic"], o["intention"]
                        total_count = sum([c for c in products.values()])

                        for shape, item in zip(["triangle", "box"], [products, intentions]):
                            for k, v in item.items():
                                k, v = norm_str(k), float(v)/total_count
                                if not g.has_node(cookie_id):
                                    g.add_node(cookie_id, shape="circle")

                                if not g.has_node(k):
                                    g.add_node(k, shape=shape)

                                if g.has_edge(cookie_id, k):
                                    g[cookie_id][k]["weight"] += v
                                else:
                                    g.add_weighted_edges_from([(cookie_id, k, v)])

    write_dot(g, self.output().fn)

if __name__ == "__main__":

    luigi_run("cookie_2016-09-21.tsv.gz")

results matching ""

    No results matching ""