#!/usr/bin/python import csv from itertools import islice import optparse from rdflib.sparql.sparqlGraph import SPARQLGraph as Graph from rdflib.sparql import GraphPattern from rdflib import URIRef, Literal, BNode, Namespace from rdflib import RDF, RDFS from gasuse import DC, DATE, GAS, DOLLAR, MILE, GALLON from gasuse import dateFromAmerican def newURI(store, base): """find an unused uri with a name like base1 base2, etc""" suffix = "" while 1: uri = URIRef("%s%s" % (base, suffix)) if not list(store.predicate_objects(uri)): break if suffix == "": suffix = 1 else: suffix = suffix + 1 return URIRef("%s%s" % (base, suffix)) def addOrGetStation(store, shortName, shortAddress): results = store.query("?station", GraphPattern([ ("?station", GAS['shortName'], Literal(shortName)), ("?station", GAS['shortAddress'], Literal(shortAddress))])) if len(results) > 1: raise ValueError("found multiple stations with same name and address") if len(results) == 1: return results[0] station = newURI(store, GAS['station/' + shortName.replace(' ','')]) for p,o in [ (RDFS.Class, GAS['station']), (GAS['shortName'], Literal(shortName)), (GAS['shortAddress'], Literal(shortAddress)), (RDFS.label, Literal("%s at %s" % (shortName, shortAddress)))]: store.add((station, p, o)) return station def addGasPrice(store, date, dollars, station): gasPrice = BNode() for p, o in [ (DC['date'], date), (GAS['pricePerGallon'], Literal(dollars, datatype=DOLLAR)), (GAS['gasStation'], station), ]: store.add((gasPrice, p, o)) return gasPrice parser = optparse.OptionParser(usage="""%prog input.csv First row will be treated as headers.""") opts, args = parser.parse_args() if len(args)<1: parser.error("You need a file") store = Graph() dialect = csv.Sniffer().sniff("\n".join(list(islice(open(args[0]), 10)))) reader = csv.reader(open(args[0]), dialect) headers = reader.next() for row in reader: record = dict(zip(headers, [unicode(f, "latin") for f in row])) if record["gallons (+adj)"] == "": continue date = dateFromAmerican(record["date"]) station = addOrGetStation(store, record["station"], record["addr"]) gasPrice = addGasPrice(store, date, record["price gal"], station) fillUp = BNode() for p,o in [ (RDFS.Class, GAS['fillUp']), (DC['date'], date), (GAS['notes'], Literal(record["notes"])), (GAS['gallons'], Literal(record["gallons (+adj)"], datatype=GALLON)), (GAS['fee'], Literal(record["fee"], datatype=DOLLAR)), (GAS['tripMeter'], Literal(record["miles (+adjs)"], datatype=MILE)), (GAS['odometer'], Literal(record["mileage"] or record["miles real"], datatype=MILE)), (GAS['gasStation'], station), (GAS['gasPrice'], gasPrice), (GAS['car'], GAS['car/drewHonda']), ]: if isinstance(o, basestring) and o == "": continue store.add((fillUp, p, o)) store.save("gas.rdf", format="pretty-xml")