from htmllib import HTMLParser
from formatter import NullFormatter
import string

class MetaParser(HTMLParser):

    def __init__(self):
	HTMLParser.__init__(self, NullFormatter())
	self.meta_dict = {}

    def do_meta(self, attrs):
	# this method is called for META tags

	name = content = None
	
	# attrs is a list of 2-tuples
	for k, v in attrs:
	    if k == "name":
		name = string.lower(v)
	    elif k == "content":
		content = v

	if name and content:
	    self.meta_dict[name] = content

    def getmeta(file):
	# extract META tags from an HTML document

	p = MetaParser()
	f = open(file)

	while 1:
	    s = fp.read(10000)
	    if not s:
		break
	    p.feed(s)

	p.close()

	# the title tag is extracted by the base class
	if p.title:
	    p.meta_dict["title"] = p.title

	return p.meta_dict