import sqlite3
taxfile = open('/Users/hughcross/Analysis/CAMEL/cam2018/new_taxonomy_finder/land_plant_genera_APNI_taxonomy.txt')
taxaDict = {}
genusList = []
for line in taxfile:
if line.startswith('superkingdom'):
continue
else:
line = line.strip('\n')
parts = line.split('\t')
genus = parts[11]
genusList.append(genus)
taxaDict.setdefault(genus, {})['regnum']='Plantae'
taxaDict.setdefault(genus, {})['division']='Charophyta'
taxaDict.setdefault(genus, {})['class']='Equisetopsida'
taxaDict.setdefault(genus, {})['subclass']=parts[5]
taxaDict.setdefault(genus, {})['superorder']=parts[6]
taxaDict.setdefault(genus, {})['order']=parts[7]
taxaDict.setdefault(genus, {})['family']=parts[8]
taxaDict.setdefault(genus, {})['subfamily']=parts[9]
taxaDict.setdefault(genus, {})['tribe']=parts[10]
print(len(genusList))
print(len(taxaDict))
taxfile.close()
# create db ## note
sqlite_file = '/Users/hughcross/Analysis/CAMEL/cam2018/taxonomyDB/Land_Plant_Taxonomy_Reference_db.sqlite'
table1 = 'land_plant_genera_APNI_taxonomy'
taxon = 'Taxon'
field_type = 'TEXT'
Genus = 'Genus'
tax_id = 'Tax_ID'
integer = 'INTEGER'
apg_taxonomy = ['regnum', 'division', 'class', 'subclass', 'superorder', 'order', 'family',
'subfamily', 'tribe', 'genus']
# connecting to db file
conn = sqlite3.connect(sqlite_file)
c = conn.cursor()
gen = 'Gen'
# remove table to redo
c.execute('DROP TABLE land_plant_genera_APNI_taxonomy')
conn.commit()
c.execute('CREATE TABLE {tn} ({nf} {ft} PRIMARY KEY)'\
.format(tn=table1, nf=gen, ft=field_type))
conn.commit()
# now add fields
for apg in apg_taxonomy:
c.execute("ALTER TABLE {tn} ADD COLUMN '{cn}' {ct}"\
.format(tn=table1, cn=apg, ct=field_type))
conn.commit()
for k,v in taxaDict.items():
genus = k
#g = [(gen)]
taxset = (k,v['regnum'],v['division'],v['class'],v['subclass'],v['superorder'],v['order'],v['family'],v['subfamily'],v['tribe'],genus)
c.execute("INSERT OR IGNORE INTO 'land_plant_genera_APNI_taxonomy' VALUES(?,?,?,?,?,?,?,?,?,?,?)", taxset)
conn.commit()