Saturday, October 17, 2009

Python and ODBC

Microsoft Office AccessImage via Wikipedia
For anyone following this blog, you know that I've been learning Python. I've been researching the method of writing scripts for reading and interacting with relational databases in general and with Microsoft Access in particular. I decided to try out a project in both Python and VB.Net.  The project is actually really simple. Given two databases, compare the table from one of them with the other to see if there is missing data. In my example I have a reference database containing the list of  tree species to compare to a table of inventory tree information. The applications however are generalized. You can use them with any two Access Databases.

Given the paths of each database, the names of each tables, the fields for comparison, and the path for the final output file, each application will work return a list of species found in the main table that are not in the reference table to the screen and to the output file. I'm going to post each program, a link to download it.

Here are my inputs that I am using:

Reference Database: Personal_FSVEG.mdb
Reference Table: fs_nris_fsveg_NRV_R5_SPECIES_XWALK
Reference Field: NRIS_SPEC

InputDatase: mnfplus07_1inv.mdb
Input Table: FSVEG_TXPT_V
Input Field: SPEC

Output Files
Missing_Species_VB.txt
Missing_Species_Python_adodb.txt
Missing_Species_Python_Win32com.txt


Accessing the database in VB.Net through ADO.Net

It did not take me long to write this but, because it so easy I gave it a GUI.

Here is the code
Imports System.IO
Public Class frmMainForm
    Dim whatfile, var, whatpath, pathvar As String
    Dim U As Integer
    Dim DS As DataSet
    Dim projectfile As String

    Private Sub Button1_Click(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles frmMain.Click

        Dim var As Object, i As Integer, ConnectionString As String, q As String
        Dim adapter As New OleDb.OleDbDataAdapter

        Me.OpenFileDialog1.InitialDirectory = "C:\"
        Me.OpenFileDialog1.Filter = "Access Database (*.mdb)|*.MDB"
        Me.OpenFileDialog1.ShowDialog()
        whatfile = Me.OpenFileDialog1.FileName
        If whatfile <> vbNullString Then

            var = Split(whatfile, "\", -1)
            U = 0
            For Each q In var
                U = U + 1
            Next q

            For i = 0 To U - 2
                whatpath = whatpath & var(i) & "\"
            Next i

        End If

        Me.txtinputfile.Text = whatfile
        pathvar = whatpath

        DS = New Data.DataSet
        DS.Clear()
        ConnectionString = "Provider=Microsoft.Jet.OLEDB.4.0;Data Source=" & Me.txtinputfile.Text
        Dim conn As OleDb.OleDbConnection = New OleDb.OleDbConnection(ConnectionString)
        conn.Open()
        Dim dt As Data.DataTable = conn.GetOleDbSchemaTable(OleDb.OleDbSchemaGuid.Tables, _
                      New Object() {Nothing, Nothing, Nothing, "TABLE"})
        'List the table name from each row in the schema table.
        For i = 0 To dt.Rows.Count - 1
            Me.cboCompareTable.Items.Add(dt.Rows(i)!TABLE_NAME.ToString)
        Next i

        'Explicitly close - don't wait on garbage collection.
        conn.Close()

    End Sub

    Private Sub cmdRun_Click(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles cmdRun.Click

        Dim adapterTXPT As New OleDb.OleDbDataAdapter
        Dim ConnectionString2 As String = "Provider=Microsoft.Jet.OLEDB.4.0;Data Source=" & Me.txtinputfile.Text

        Dim adapter As New OleDb.OleDbDataAdapter
        Dim ConnectionString As String = "Provider=Microsoft.Jet.OLEDB.4.0;Data Source=" & Me.txtReferenceFile.Text

        adapterTXPT = New OleDb.OleDbDataAdapter("SELECT DISTINCT " & Me.cboCompaeField.Text & " FROM " & Me.cboCompareTable.Text, ConnectionString2)
        ' Code to modify data in DataSet here

        Try
            adapterTXPT.MissingSchemaAction = MissingSchemaAction.AddWithKey
            adapterTXPT.Fill(DS, "CompareTable")
        Catch
            MsgBox("Error is: " & Err.Number & " " & Err.Description)
        End Try

        ConnectionString = "Provider=Microsoft.Jet.OLEDB.4.0;Data Source=" & Me.txtReferenceFile.Text
        adapter = New OleDb.OleDbDataAdapter("SELECT DISTINCT " & Me.cboReferenceField.Text & " FROM " & Me.cboReferenceTable.Text, ConnectionString)
        ' Code to modify data in DataSet here

        Try
            adapterTXPT.MissingSchemaAction = MissingSchemaAction.AddWithKey
            adapterTXPT.Fill(DS, "CompareTable")
        Catch
            MsgBox("Error is: " & Err.Number & " " & Err.Description)
        End Try

        Try
            adapter.MissingSchemaAction = MissingSchemaAction.AddWithKey
            adapter.Fill(DS, "ReferenceTable")
        Catch
            MsgBox("Error is: " & Err.Number & " " & Err.Description)
        End Try

        If File.Exists(projectfile) Then
            File.Delete(projectfile)
        End If

        Dim writenow As Boolean = True
        Dim MyFile As IO.StreamWriter
        MyFile = New System.IO.StreamWriter(projectfile, True) ' True for appending

        MyFile.WriteLine("Missing Species found in the TXPT")

        Dim dtTXPT As DataTable
        dtTXPT = DS.Tables("CompareTable")
        Dim dtREF As DataTable
        dtREF = DS.Tables("ReferenceTable")

        Dim dr As DataRow, SPEC As String, drref As DataRow
        writenow = True
        For Each dr In dtTXPT.Rows
            SPEC = dr(Me.cboCompaeField.Text)
            For Each drref In dtREF.Rows
                If SPEC = drref(Me.cboReferenceField.Text) Then
                    writenow = False
                    Exit For
                End If
            Next

            If writenow Then
                lstTXPT.Items.Add(SPEC)
                MyFile.WriteLine(SPEC) ' Write text to file

            End If

            writenow = True

        Next

        MyFile.Close() ' Close file 
        MsgBox("Done")

    End Sub

    Private Sub cmdQuit_Click(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles cmdQuit.Click
        End
    End Sub

    Private Sub cmdSave_Click(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles cmdSave.Click
        Me.FolderBrowserDialog1.Reset()
        Me.FolderBrowserDialog1.ShowDialog()
        pathvar = Me.FolderBrowserDialog1.SelectedPath.ToString
        Dim projectname As String = InputBox("Type the name you want to give the report file:", "Choose name for Report")
        projectfile = pathvar & "\" & projectname
        Me.txtoutput.Text = projectfile
    End Sub

    Private Sub frmMainForm_Load(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles MyBase.Load

    End Sub

    Private Sub cmdBrowseRefernceFile_Click(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles cmdBrowseRefernceFile.Click
        Dim var As Object, i As Integer, ConnectionString As String, q As String
        Dim adapter As New OleDb.OleDbDataAdapter
        Dim adapterTXPT As New OleDb.OleDbDataAdapter
        Dim adapterVXPT As New OleDb.OleDbDataAdapter

        Me.OpenFileDialog1.InitialDirectory = "C:\"
        Me.OpenFileDialog1.Filter = "Access Database (*.mdb)|*.MDB"
        Me.OpenFileDialog1.ShowDialog()
        whatfile = Me.OpenFileDialog1.FileName
        If whatfile <> vbNullString Then

            var = Split(whatfile, "\", -1)
            U = 0
            For Each q In var
                U = U + 1
            Next q

            For i = 0 To U - 2
                whatpath = whatpath & var(i) & "\"
            Next i

        End If

        Me.txtReferenceFile.Text = whatfile
        pathvar = whatpath

        DS = New Data.DataSet
        DS.Clear()
        ConnectionString = "Provider=Microsoft.Jet.OLEDB.4.0;Data Source=" & Me.txtReferenceFile.Text
        Dim conn As OleDb.OleDbConnection = New OleDb.OleDbConnection(ConnectionString)
        conn.Open()
        Dim dt As Data.DataTable = conn.GetOleDbSchemaTable(OleDb.OleDbSchemaGuid.Tables, _
                      New Object() {Nothing, Nothing, Nothing, "TABLE"})
        'List the table name from each row in the schema table.
        For i = 0 To dt.Rows.Count - 1
            Me.cboReferenceTable.Items.Add(dt.Rows(i)!TABLE_NAME.ToString)
        Next i

        'Explicitly close - don't wait on garbage collection.
        conn.Close()
    End Sub

    Private Sub cboCompareTable_SelectedValueChanged(ByVal sender As Object, ByVal e As System.EventArgs) Handles cboCompareTable.SelectedValueChanged

        Dim Connectionstring As String = "Provider=Microsoft.Jet.OLEDB.4.0;Data Source=" & Me.txtinputfile.Text
        Dim adapter As OleDb.OleDbDataAdapter = New OleDb.OleDbDataAdapter("SELECT * FROM " & Me.cboCompareTable.Text, Connectionstring)

        Try
            adapter.MissingSchemaAction = MissingSchemaAction.AddWithKey
            adapter.Fill(DS, Me.cboCompareTable.Text)
        Catch
            MsgBox("Error is: " & Err.Number & " " & Err.Description)
        End Try

        Dim fld As DataColumn, dr As DataRow
        dr = DS.Tables(Me.cboCompareTable.Text).Rows(0)
        For Each fld In dr.Table.Columns
            Me.cboCompaeField.Items.Add(fld.ColumnName)
        Next
    End Sub

    Private Sub cboReferenceTable_SelectedValueChanged(ByVal sender As Object, ByVal e As System.EventArgs) Handles cboReferenceTable.SelectedValueChanged

        Dim Connectionstring As String = "Provider=Microsoft.Jet.OLEDB.4.0;Data Source=" & Me.txtReferenceFile.Text
        Dim adapter As OleDb.OleDbDataAdapter = New OleDb.OleDbDataAdapter("SELECT * FROM " & Me.cboReferenceTable.Text, Connectionstring)

        Try
            adapter.MissingSchemaAction = MissingSchemaAction.AddWithKey
            adapter.Fill(DS, Me.cboReferenceTable.Text)
        Catch
            MsgBox("Error is: " & Err.Number & " " & Err.Description)
        End Try

        Dim fld As DataColumn, dr As DataRow
        dr = DS.Tables(Me.cboReferenceTable.Text).Rows(0)
        For Each fld In dr.Table.Columns
            Me.cboReferenceField.Items.Add(fld.ColumnName)
        Next

    End Sub

    Private Sub cmdClear_Click(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles cmdClear.Click
        With Me
            .txtinputfile.Text = ""
            .txtoutput.Text = ""
            .txtReferenceFile.Text = ""

            Dim t As Integer, i As Integer
            i = .lstTXPT.Items.Count
            For t = 1 To i
                .lstTXPT.Items.RemoveAt(0)
            Next

            i = .cboCompareTable.Items.Count
            For t = 1 To i
                .cboCompareTable.Items.RemoveAt(0)
            Next

            i = .cboReferenceField.Items.Count
            For t = 1 To i
                .cboReferenceField.Items.RemoveAt(0)
            Next

            i = .cboReferenceTable.Items.Count
            For t = 1 To i
                .cboReferenceTable.Items.RemoveAt(0)
            Next

            i = .cboCompaeField.Items.Count
            For t = 1 To i
                .cboCompaeField.Items.RemoveAt(0)
            Next

        End With
    End Sub
End Class


Here is the VB.Net version in action






I like doing this kind of work in VB because GUIs are so easy and fast to put together.


Accessing the database in Python

Fortunately, there was plenty of information about how to achieve the same effect in Python. I found two ways of doing it. Neither way offers a GUI but instead a user interactively enters inputs using commandline.

Accessing the database in Python through ADOdb/mxODBC

In scouring the Internet, I found the following library for Python. ADOdb depends on the mxODBC library. The information for how to get them can be found here and there. As I stated in a previous post, the mxODBC library is not free but you can get a 30-day license in which to to test. It is also important to remember that you must install these libraries to get the following script to work.

Here is the Python code using the ADOdb/mxODBC libraries
'''
Created on Oct 13, 2009

@author: marcusmcelhaney
'''
#This script will be designed to compare a given inventory file versus a species reference table
#This is to test ODBC connection to Access Databases
#Results will be printed out on screen and in a file.
#Uses Python 2.5

#Used the technique from http://phplens.com/lens/adodb/adodb-py-docs.htm#install


#Start connections to Access database
# ODBC for Access database containing the Reference table
import adodb
import os.path

lstTXPT = []

#get the location of the reference file
reffile = input('Enter the file path and name for reference file: ')
inputfile2 = os.path.split(reffile)
reffile  = inputfile2[0] + "\\" + inputfile2[1]
#reffile = "C:/Temp/Personal_FSVEG.mdb"

#get Reference SQL Statement
reffield = input("Enter field to get Records from the reference table: ")
#reffield="NRIS_SPEC"

#Get Reference table
refTable = input("Enter Reference Table: ")
#refTable = "fs_nris_fsveg_NRV_R5_SPECIES_XWALK"

conn0 = adodb.NewADOConnection('access') # mxodbc required
dsn = "Driver={Microsoft Access Driver (*.mdb)};Dbq=" + reffile + ";"
conn0.Connect(dsn)
cursor = conn0.Execute('select distinct ' + reffield + " from " + refTable)


#get the location of the inventory file
inputfile = input('Enter the file path and name for inventory: ')
inputfile2 = os.path.split(inputfile)
inputfile  = inputfile2[0] + "\\" + inputfile2[1]

#inputfile = inputfile.replace("\", "/",5)

#Get compare field
comparefield = input("Enter the field to compare in the desired table: ")
#comparefield = "SPEC"

#Get compare table
CompareTable = input("Enter the name of the table you are comparing: ")
#CompareTable = "FSVEG_TXPT_V"

out_file = open("c:/Temp/Missing_Species_Python_adodb.txt", "w")

#Make connection string for inventory file
conn2 = adodb.NewADOConnection('access') # mxodbc required
dsn2 = "Driver={Microsoft Access Driver (*.mdb)};Dbq=" + inputfile + ";" 
conn2.Connect(dsn2)
cursorTXPT = conn2.Execute('select distinct ' + comparefield + ' from ' + CompareTable)

print "Getting list of comparefield not in Compare Table"
writenow = True
cursorTXPT.MoveNext()
while not cursorTXPT.EOF:
    DesiredSPEC = cursorTXPT.fields[0]
    
    while not cursor.EOF:
        if DesiredSPEC == cursor.fields[0]:
            writenow = False
            cursor = conn0.Execute('select distinct ' + reffield + " from " + refTable)
            break
        cursor.MoveNext()
      
    if writenow:       
        lstTXPT.append(DesiredSPEC)
        out_file.write(DesiredSPEC)
        cursor = conn0.Execute('select distinct ' + reffield + " from " + refTable)
        
    writenow = True
    cursorTXPT.MoveNext()


cursorTXPT.Close()    
cursor.Close()
conn2.Close()
conn0.Close() 

print "Missing Species from the TXPT"
for species in lstTXPT:
    print species
    
out_file.close()
print "Done"  

Accessing the database in Python through Win32com

Here is the other method I found for connecting a python script into an Access database. I think this will be my method of choice because it's library is free and the syntax and use closely mirrors that of ADO classic which I am very familiar with. Here is the code:

'''
Created on Oct 15, 2009

@author: marcusmcelhaney
'''
#This script will be designed to compare a given inventory file versus a species reference table
#This is to test ODBC connection to Access Databases
#Results will be printed out on screen and in a file.
#Uses Python 2.5

#Used the technique Win32com


#Start connections to Access database
# ODBC for Access database containing the Reference table
import win32com.client                        
import os.path

lstTXPT = []

#get the location of the reference file
reffile = input('Enter the file path and name for reference file: ')
inputfile2 = os.path.split(reffile)
reffile  = inputfile2[0] + "\\" + inputfile2[1]
#reffile = "C:/Temp/Personal_FSVEG.mdb"

#get Reference SQL Statement
reffield = input("Enter field to get Records from the reference table: ")
#reffield="NRIS_SPEC"

#Get Reference table
refTable = input("Enter Reference Table: ")
#refTable = "fs_nris_fsveg_NRV_R5_SPECIES_XWALK"

connexion = win32com.client.gencache.EnsureDispatch('ADODB.Connection')            
dsn = "Driver={Microsoft Access Driver (*.mdb)};Dbq=" + reffile + ";"
connexion.Open(dsn)

AD_OPEN_KEYSET = 1
AD_LOCK_OPTIMISTIC = 3

rs = win32com.client.Dispatch(r'ADODB.Recordset')
rs.CursorLocation = 3 

offset = 1            
    
rs.Open('select distinct ' + reffield + " from " + refTable, connexion, AD_OPEN_KEYSET, AD_LOCK_OPTIMISTIC)

#get the location of the inventory file
inputfile = input('Enter the file path and name for inventory: ')
inputfile2 = os.path.split(inputfile)
inputfile  = inputfile2[0] + "\\" + inputfile2[1]

#inputfile = inputfile.replace("\", "/",5)

#Get compare field
#comparefield = input("Enter the field to compare in the desired table: )
comparefield = "SPEC"

#Get compare table
CompareTable = input("Enter the name of the table you are comparing: ")
#CompareTable = "FSVEG_TXPT_V"

out_file = open("c:/Temp/Missing_Species_Python_win32com.txt", "w")

#Make connection string for inventory file
connexion2 = win32com.client.gencache.EnsureDispatch('ADODB.Connection')            
dsn2 = "Driver={Microsoft Access Driver (*.mdb)};Dbq=" + inputfile + ";" 
connexion2.Open(dsn2)

AD_OPEN_KEYSET = 1
AD_LOCK_OPTIMISTIC = 3

rsTXPT = win32com.client.Dispatch(r'ADODB.Recordset')
rsTXPT.CursorLocation = 3 

offset = 1            
    
rsTXPT.Open('select distinct ' + comparefield + ' from ' + CompareTable, connexion2, AD_OPEN_KEYSET, AD_LOCK_OPTIMISTIC)

print "Getting list of comparefield not in Compare Table"
writenow = True

# expand the recordset as list of tuples
while not rsTXPT.EOF: 
    rsTXPT.MoveNext
    DesiredSPEC = rsTXPT.Fields.Item(comparefield).Value
    rs.MoveNext
    while not rs.EOF:
        
        if DesiredSPEC == rs.Fields.Item(reffield).Value:
            writenow = False
            rs.Close()
            rs.Open('select distinct ' + reffield + " from " + refTable, connexion, AD_OPEN_KEYSET, AD_LOCK_OPTIMISTIC)
            break
        rs.MoveNext()
      
    if writenow:       
        lstTXPT.append(DesiredSPEC)
        out_file.write(DesiredSPEC)
        rs.Close()
        rs.Open('select distinct ' + reffield + " from " + refTable, connexion, AD_OPEN_KEYSET, AD_LOCK_OPTIMISTIC)
        
    writenow = True
    rsTXPT.MoveNext()


rsTXPT.Close()    
rs.Close()
connexion2.Close()
connexion.Close() 

print "Missing Species from the TXPT"
for species in lstTXPT:
    print species
    
out_file.close()
print "Done" 

You can down load the source code below


Download these
Name
Purpose
Link
VB.Net version
Software installer
Setup1
Setup1.msi
Hosted by eSnips
VB.Net Source code
Project files
SpeciesMatch
SpeciesMatch.zip
Hosted by eSnips
Python ADOdb Source code
Project files
TestSpeciesCode_adodb
TestSpeciesCode_ad...
Hosted by eSnips
Python Win32com Source code
Project files
TestSpeciesCode_Win32Com
TestSpeciesCode_Wi...
Hosted by eSnips
Personal_FSVEG.mdb
Reference database
Personal_FSVEG
Personal_FSVEG.mdb
Hosted by eSnips
Comparing Inventory
Test database
Personal_FSVEG
Personal_FSVEG.mdb
Hosted by eSnips

No comments: