sudo apt-get install python-bs4
sudo apt-get install python-mysqldb
Extract MARC data and insert it into new table
1) create the next table
DROP TABLE IF EXISTS `newmarcrecords`;
CREATE TABLE `NewMarcRecords` (
`BibID` VARCHAR(30) NOT NULL ,
`Leader` VARCHAR(30) NULL ,
`Title` VARCHAR(500) NOT NULL ,
`Auther` VARCHAR(500) NULL ,
`Publisher` VARCHAR(500) NULL ,
`PublishYear` VARCHAR(500) NULL ,
`PublishLocation` VARCHAR(500) NULL ,
`Subject` VARCHAR(500) NULL ,
`Classification` VARCHAR(500) NULL ,
`RecordSource` VARCHAR(500) NULL ,
`Pages` VARCHAR(500) NULL ,
`URL` VARCHAR(500) NULL ,
`CoverURL` VARCHAR(500) NULL ,
`Price` VARCHAR(30) NULL ,
`RecordStatus` VARCHAR(1) NULL ,
PRIMARY KEY (`BibID`)) DEFAULT CHARSET=utf8 COLLATE=utf8_general_ci ENGINE = InnoDB;
2) run the next Python code
# -*- coding: utf-8 -*-
#!/usr/bin/python
"""
DROP TABLE IF EXISTS `newmarcrecords`;
CREATE TABLE `NewMarcRecords` (
`BibID` VARCHAR(30) NOT NULL ,
`Leader` VARCHAR(30) NULL ,
`Title` VARCHAR(500) NOT NULL ,
`Auther` VARCHAR(500) NULL ,
`Publisher` VARCHAR(500) NULL ,
`PublishYear` VARCHAR(500) NULL ,
`PublishLocation` VARCHAR(500) NULL ,
`Subject` VARCHAR(500) NULL ,
`Classification` VARCHAR(500) NULL ,
`RecordSource` VARCHAR(500) NULL ,
`Pages` VARCHAR(500) NULL ,
`URL` VARCHAR(500) NULL ,
`CoverURL` VARCHAR(500) NULL ,
`Price` VARCHAR(30) NULL ,
`RecordStatus` VARCHAR(1) NULL ,
PRIMARY KEY (`BibID`)) DEFAULT CHARSET=utf8 COLLATE=utf8_general_ci ENGINE = InnoDB;
"""
import MySQLdb
from bs4 import BeautifulSoup
def GetValue( Tag,Feild ):
check=y.find(tag=Tag)
if check is not None:
check = check.find(code=Feild)
if check is not None:
return check.get_text()
return '---'
db = MySQLdb.connect(host="localhost", # your host, usually localhost
user="root", # your username
passwd="admin123", # your password
db="koha" , # name of the data base
charset='utf8')
# you must create a Cursor object. It will let
# you execute all the queries you need
cur = db.cursor()
# Use all the SQL you like
cur.execute("SELECT biblionumber,metadata FROM `biblio_metadata` where CAST(biblionumber AS CHAR) not in (select BibID from NewMarcRecords)")
# print all the first cell of all the rows
for row in cur.fetchall():
biblionumber= row[0]
xmlMarc = row[1]
y=BeautifulSoup(xmlMarc,"html5lib")
Leader=y.record.leader.get_text()
title=GetValue( "245","a")
author=GetValue( "100","a")
Publisher=GetValue( "260","a")
PublishYear=GetValue( "260","c")
PublishLocation=GetValue( "260","a")
Subject=GetValue( "650","a")
Classification=GetValue( "082","a")
RecordSource=GetValue( "956","s")
Pages=GetValue( "300","a")
URL=GetValue( "856","u")
CoverURL=GetValue( "956","u")
Price=GetValue( "956","p")
RecordStatus='0'
try:
cur.execute("""
INSERT INTO `newmarcrecords` (`BibID`, `Leader`, `Title`, `Auther`, `Publisher`, `PublishYear`, `PublishLocation`
, `Subject`, `Classification`, `RecordSource`, `Pages`, `URL`, `CoverURL`, `Price`, `RecordStatus`)
VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
"""
,(biblionumber,Leader,title,author,Publisher,PublishYear,PublishLocation
,Subject,Classification,RecordSource,Pages,URL,CoverURL,Price,RecordStatus))
db.commit()
print "success"
except ValueError as e:
db.rollback()
print "Fail!"
print e.strerror
print title
print "========================="
db.close()
#!/usr/bin/python
"""
DROP TABLE IF EXISTS `newmarcrecords`;
CREATE TABLE `NewMarcRecords` (
`BibID` VARCHAR(30) NOT NULL ,
`Leader` VARCHAR(30) NULL ,
`Title` VARCHAR(500) NOT NULL ,
`Auther` VARCHAR(500) NULL ,
`Publisher` VARCHAR(500) NULL ,
`PublishYear` VARCHAR(500) NULL ,
`PublishLocation` VARCHAR(500) NULL ,
`Subject` VARCHAR(500) NULL ,
`Classification` VARCHAR(500) NULL ,
`RecordSource` VARCHAR(500) NULL ,
`Pages` VARCHAR(500) NULL ,
`URL` VARCHAR(500) NULL ,
`CoverURL` VARCHAR(500) NULL ,
`Price` VARCHAR(30) NULL ,
`RecordStatus` VARCHAR(1) NULL ,
PRIMARY KEY (`BibID`)) DEFAULT CHARSET=utf8 COLLATE=utf8_general_ci ENGINE = InnoDB;
"""
import MySQLdb
from bs4 import BeautifulSoup
def GetValue( Tag,Feild ):
check=y.find(tag=Tag)
if check is not None:
check = check.find(code=Feild)
if check is not None:
return check.get_text()
return '---'
db = MySQLdb.connect(host="localhost", # your host, usually localhost
user="root", # your username
passwd="admin123", # your password
db="koha" , # name of the data base
charset='utf8')
# you must create a Cursor object. It will let
# you execute all the queries you need
cur = db.cursor()
# Use all the SQL you like
cur.execute("SELECT biblionumber,metadata FROM `biblio_metadata` where CAST(biblionumber AS CHAR) not in (select BibID from NewMarcRecords)")
# print all the first cell of all the rows
for row in cur.fetchall():
biblionumber= row[0]
xmlMarc = row[1]
y=BeautifulSoup(xmlMarc,"html5lib")
Leader=y.record.leader.get_text()
title=GetValue( "245","a")
author=GetValue( "100","a")
Publisher=GetValue( "260","a")
PublishYear=GetValue( "260","c")
PublishLocation=GetValue( "260","a")
Subject=GetValue( "650","a")
Classification=GetValue( "082","a")
RecordSource=GetValue( "956","s")
Pages=GetValue( "300","a")
URL=GetValue( "856","u")
CoverURL=GetValue( "956","u")
Price=GetValue( "956","p")
RecordStatus='0'
try:
cur.execute("""
INSERT INTO `newmarcrecords` (`BibID`, `Leader`, `Title`, `Auther`, `Publisher`, `PublishYear`, `PublishLocation`
, `Subject`, `Classification`, `RecordSource`, `Pages`, `URL`, `CoverURL`, `Price`, `RecordStatus`)
VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)
"""
,(biblionumber,Leader,title,author,Publisher,PublishYear,PublishLocation
,Subject,Classification,RecordSource,Pages,URL,CoverURL,Price,RecordStatus))
db.commit()
print "success"
except ValueError as e:
db.rollback()
print "Fail!"
print e.strerror
print title
print "========================="
db.close()
1 comment:
I don't know whether it's just me or if perhaps everyone else encountering problems with your website. It appears like some of the written text in your posts are running off the screen. Can somebody else please provide feedback and let me know if this is happening to them as well? This could be a problem with my browser because I've had this happen before. Appreciate it
Post a Comment