Sunday, October 11, 2020

Validate URLs using Python

 

We have two DB tables

files Table with two columns (id, URL)
Validation Table with two column (id, Status)

Python Application will loop through "files" DB Table and check if the URL are exists and record validation status to another Table


 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# -*- coding: utf-8 -*-
#!/usr/bin/python
from __future__ import print_function
import MySQLdb    #sudo apt-get install python-mysqldb
import sys
import re
import requests

reload(sys)
sys.setdefaultencoding('utf8')

my_db = MySQLdb.connect(host="IP",    # your host, usually localhost
                     user="user",         # your username
                     passwd="Password",   # your password
                     db="DB",             # name of the data base
                     charset='utf8')


my_db.autocommit(True)
my_cur = my_db.cursor(MySQLdb.cursors.DictCursor)
my_cur.execute("SET session group_concat_max_len=30000;")

my_cur.execute("""
                    SELECT id,url FROM `files` where url like 'http%' order by id
                   """
                   ) 

try:
    MarcRecords=my_cur.fetchall()
    for row in MarcRecords:
        r = requests.get(str(row["url"]))
        my_cur.execute("insert into `Validation` (id,Status) values("+str(row["id"])+",'"+str(r.status_code)+"')")
        if(r.status_code != 200):
            print('Error: '+str(row["BibID"]))
        else:
            print('Success: '+str(row["BibID"]))

            

except Exception as e:
    print ("Fail!")
    print (str(e))


my_cur.close()
my_db.close()

print ("============= END ====================")

No comments: