Beruflich Dokumente
Kultur Dokumente
/usr/bin/python
'''
Relational Tag Cloud sketch
Copyright (c) 2007, Gabor Papp (gabor.papp at mndl hu)
All rights reserved.
import urllib
import re
import sys
url_deli_tags = 'http://del.icio.us/tag/?sort=freq'
url_deli_host = 'http://del.icio.us/'
nodes = []
nodesu = []
nodesw = []
edges = {}
item_tags = {}
# downloads items associated with the current tag and finds all item tags
# filter out unpopular tags
def deli_get_item_tags(tag_url, main_tags):
items_page = urllib.urlopen(url_deli_host + tag_url + '?setcount=%d' % \
items_per_tag).read()
post_pattern = '<li class="post" key="(.+?)">(.+?)<\/li>'
global item_tags
items = re.findall(post_pattern, items_page, re.DOTALL)
for (key, data) in items:
if key not in item_tags:
tag_pattern = '<a class="tag" href=".+?">(.+?)</a>'
tags = re.findall(tag_pattern, data)
tags = filter(lambda x, main_tags=main_tags: x in main_tags, tags)
item_tags[key] = tags
def uniq(alist):
set = {}
map(set.__setitem__, alist, [])
return set.keys()
for a in item_nodes:
for b in item_nodes:
if a != b:
if (a < b):
edge = (a, b)
else:
edge = (b, a)
if edge not in edges:
edges[edge] = 1
else:
edges[edge] += 1
nodes += item_nodes
for i in range(len(nodesw)):
w = fontsize_min + \
(nodesw[i][1]-min_count)*(fontsize_max-fontsize_min)/float(max_count-
min_count)
print '\t\t\t<node id="%d" prop="%s" weight="%4.2f"/>' % (i, nodesw[i][0],
w)
for ((a, b), n) in edges.items():
weight = n/float(nodes.count(a)+nodes.count(b))
sys.stderr.write('%s(%d)-%s(%d) %d %f\n' % (a, nodes.count(a), b,
nodes.count(b), n, weight))
if weight > tag_relfreq_thres:
print '\t\t\t<edge source="%d" dest="%d" />' % (nodesu.index(a),
nodesu.index(b))
print '\t\t</stuff>'
print '\t</mx:XML>'
print '\t</adobe:dataProvider>'
print '\t<adobe:itemRenderer>'
print '\t\t<mx:Component>'
print '\t\t\t<mx:Label fontSize="{data.data.@weight}" text="{data.data.@prop}"
color="#ffffff"/>'
print '\t\t</mx:Component>'
print '\t</adobe:itemRenderer>'
print '</adobe:SpringGraph>'
print '</mx:Application>'
def main():
tags = deli_read_tag_links()
end = min(len(tags), number_of_tags)
tags = tags[:end]
(tag_urls, main_tags) = zip(*tags)
for url in tag_urls:
sys.stderr.write(url+'\n')
deli_get_item_tags(url, main_tags)
make_graph()
print_springgraph()
main()