DZone Snippets is a public source code repository. Easily build up your personal collection of code snippets, categorize them with tags / keywords, and share them with the world

Snippets has posted 5883 posts at DZone. View Full User Profile

A Simple Python Class To Browse Snippets Website (with Beautifoulsoup)

09.09.2005
| 3142 views |
  • submit to reddit
        if you got some path/enhancements, you can mail me at my pseudo at gmail.com, i'll update it.
(you should install the marvellous beautifulsoup module, http://www.crummy.com/software/BeautifulSoup/documentation.html)

the snippets.py file :
from BeautifulSoup import BeautifulSoup
import urllib

class Keyword: # top tags
    def __init__(self,tag,nb):
        self.tag=tag
        self.nb=int(nb)
    def __repr__(self):
        return "<Keyword '%s' : %d>" % (self.tag,self.nb)

class Snippet:
    def __init__(self,title,code,tags):
        self.title=title
        self.code=code
        self.tags = tags
    def __repr__(self):
        return "<Snippet '%s' : tags %s>" % (self.title,str(self.tags))

class Snippets:
    urlForTags = "http://www.bigbold.com/snippets/tags"
    
    def __init__(self,l=[]):
        url = self.__getUrlForTags(l)
        
        #load the url
        fu = urllib.urlopen(url)
        content = fu.read()
        fu.close()

        self.tags = l
        self.keywords,self.snippets = self.__extractContent(content)

    def __repr__(self):
        return "<Snippets for tags:%s>" % (str(self.tags))

    def __getUrlForTags(self, l ):
        assert type(l)==list
        l = [Snippets.urlForTags] + l
        return "/".join(l)
    
    def __extractContent(self,content):
        
        soup = BeautifulSoup( content ) 
            
        # get the keywords
        tagTable=soup('div', {'id' : "sidebar"})[0].table
        keywords=[]
        for i in tagTable("tr"):
            td = i("td")
            
            # add this keyword
            try:
                # extract from the empty selection page "/tags"
                keywords.append( Keyword(td[1].span.a.string , td[0].string) )
            except TypeError:
                # extract from a selected selection page "/tag/something"
                keywords.append( Keyword(td[2].span.a.string , td[1].string) )
        
        # get the snippets
        postList=soup('div', {'class' : "post"})
        snippets=[]
        for i in postList:
            divs = i("div")
            
            # get title and tags
            title =  divs[0].h3.a.string # title
            tags = [j.string for j in divs[1]("a")][:-1] #don't get the user ;-)

            # get code of the snippet
            list = [j for j in divs[0]][1:]# zap the first (h3)
            code=""
            for i in list: 
                try:
                    if i.name == "pre":
                        try:
                            code+=i.string
                        except TypeError:
                            pass
                except AttributeError:
                    # transform "out-pre-text" in comment
                    out = str(i).strip()
                    if out:
                        code+="#| "+out+"\n" 
            
            # add this snippet
            snippets.append( Snippet(title,code,tags) )
            
        return keywords,snippets

and an example (all returned "strings" are in utf-8):
from snippets import Snippets

s = Snippets(["python","xml"])
print s
print s.keywords # the "top tags" column
for i in s.snippets:
    print i
print s.snippets[6].title # the title of the 6th
print s.snippets[6].code  # the code of the 6th
    

Comments

Snippets Manager replied on Mon, 2012/05/07 - 2:14pm

thanks a lot, korakot !

Snippets Manager replied on Mon, 2012/05/07 - 2:14pm

Great code, manatlan!