Installing Beautiful Soup
$ apt-get install python-bs4 (for Python 2)
$ apt-get install python3-bs4 (for Python 3)
OR
sudo apt-get install python-setuptools python-dev build-essential
sudo easy_install pip
sudo pip install --upgrade virtualenv
pip install beautifulsoup4
Example 1
Example 2
$ apt-get install python-bs4 (for Python 2)
$ apt-get install python3-bs4 (for Python 3)
OR
sudo apt-get install python-setuptools python-dev build-essential
sudo easy_install pip
sudo pip install --upgrade virtualenv
pip install beautifulsoup4
Example 1
from bs4 import BeautifulSoup
x="""<foo>
<bar>
<type foobar="1"/>
<type foobar="2"/>
</bar>
</foo>"""
y=BeautifulSoup(x)
>>> y.foo.bar.type["foobar"]
u'1'
>>> y.foo.bar.findAll("type")
[<type foobar="1"></type>, <type foobar="2"></type>]
>>> y.foo.bar.findAll("type")[0]["foobar"]
u'1'
>>> y.foo.bar.findAll("type")[1]["foobar"]
u'2'
Example 2
from bs4 import BeautifulSoup
x="""<marc:record>
<marc:leader>00001cabaa2200000 a 4500</marc:leader>
<marc:datafield tag="035" ind1=" " ind2=" ">
<marc:subfield code="a">(kwc)7350</marc:subfield>
</marc:datafield>
<marc:datafield tag="050" ind1=" " ind2="4">
<marc:subfield code="a">BL</marc:subfield>
</marc:datafield>
<marc:datafield tag="082" ind1=" " ind2=" ">
<marc:subfield code="a">200</marc:subfield>
<marc:subfield code="a">220</marc:subfield>
<marc:subfield code="a">274</marc:subfield>
<marc:subfield code="a">298</marc:subfield>
</marc:datafield>
<marc:datafield tag="100" ind1="1" ind2=" ">
<marc:subfield code="a">الزحيلي، وهبة،</marc:subfield>
</marc:datafield>
<marc:datafield tag="245" ind1="2" ind2="0">
<marc:subfield code="a">الجزاء و العقوبة في الإسلام /</marc:subfield>
<marc:subfield code="c">وهبة الزحيلي.</marc:subfield>
</marc:datafield>
</marc:record>"""
y=BeautifulSoup(x,"html5lib")
title=y.find(tag="245").find(code="a").get_text()
author=y.find(tag="100").find(code="a").get_text()
Dewey=y.find(tag="082").find(code="a").get_text()
Dewey1=y.find(tag="082").find_all(code="a", limit=1)[0].get_text()
print title
print author
print Dewey
print Dewey1
For more information please visit
https://www.crummy.com/software/BeautifulSoup/bs4/doc/
No comments:
Post a Comment