Create complete land plant taxonomy DB for all genera

In [2]:
import sqlite3
In [5]:
taxfile = open('/Users/hughcross/Analysis/CAMEL/cam2018/new_taxonomy_finder/land_plant_genera_APNI_taxonomy.txt')
In [6]:
taxaDict = {}
genusList = []
for line in taxfile:
    if line.startswith('superkingdom'):
        continue
    else:
        
        line = line.strip('\n')
        parts = line.split('\t')
        genus = parts[11]
        genusList.append(genus)
        taxaDict.setdefault(genus, {})['regnum']='Plantae'
        taxaDict.setdefault(genus, {})['division']='Charophyta'
        taxaDict.setdefault(genus, {})['class']='Equisetopsida'
        taxaDict.setdefault(genus, {})['subclass']=parts[5]
        taxaDict.setdefault(genus, {})['superorder']=parts[6]
        taxaDict.setdefault(genus, {})['order']=parts[7]
        taxaDict.setdefault(genus, {})['family']=parts[8]
        taxaDict.setdefault(genus, {})['subfamily']=parts[9]
        taxaDict.setdefault(genus, {})['tribe']=parts[10]
print(len(genusList))
print(len(taxaDict))
taxfile.close()
15082
15082

Create database and all of dictionary

In [7]:
# create db ## note 
sqlite_file = '/Users/hughcross/Analysis/CAMEL/cam2018/taxonomyDB/Land_Plant_Taxonomy_Reference_db.sqlite'
table1 = 'land_plant_genera_APNI_taxonomy'
taxon = 'Taxon'
field_type = 'TEXT'
Genus = 'Genus'
tax_id = 'Tax_ID'
integer = 'INTEGER'
In [19]:
apg_taxonomy = ['regnum', 'division', 'class', 'subclass', 'superorder', 'order', 'family',
               'subfamily', 'tribe', 'genus']
In [8]:
# connecting to db file
conn = sqlite3.connect(sqlite_file)
c = conn.cursor()
In [21]:
gen = 'Gen'
In [22]:
# remove table to redo
c.execute('DROP TABLE land_plant_genera_APNI_taxonomy')
conn.commit()
In [23]:
c.execute('CREATE TABLE {tn} ({nf} {ft} PRIMARY KEY)'\
         .format(tn=table1, nf=gen, ft=field_type))
conn.commit()
In [24]:
# now add fields
for apg in apg_taxonomy:
    c.execute("ALTER TABLE {tn} ADD COLUMN '{cn}' {ct}"\
             .format(tn=table1, cn=apg, ct=field_type))
conn.commit()
In [46]:
for k,v in taxaDict.items():
    genus = k
    #g = [(gen)]
    taxset = (k,v['regnum'],v['division'],v['class'],v['subclass'],v['superorder'],v['order'],v['family'],v['subfamily'],v['tribe'],genus)
    c.execute("INSERT OR IGNORE INTO 'land_plant_genera_APNI_taxonomy' VALUES(?,?,?,?,?,?,?,?,?,?,?)", taxset)


conn.commit()