lundi 2 février 2015

Storing a graph in SQLite - Relational or Flat?

The Problem


I need to store a directed graph in an sqlite database and I'm wondering how to solve this.


My first solution was to create two tables, one for the nodes and one for the edges, however I'm unsatisfied with the insertion performance compared to a non-relational model of the problem.


Given a typical graph of 100k nodes, each having 50 outgoing edges, inserting into a relational model takes ~20s vs 7s using a flat approach.


Actual question



  • Is there anything I can to do speed up the process of inserting the graph into a database with the relational model?

  • Am I using the SQLite API in a suboptimal fashion?

  • Should I even consider a relational approach, given that it's not required, only "nice"?


Additional information


I've written two samples to demonstrate my problem:


Relational Approach



public sealed class RelationalGraphDatabase
: IDatabase
{
private readonly SQLiteConnection _connection;

public RelationalGraphDatabase()
{
const string fname = @"E:\relational_graph.db";
if (File.Exists(fname))
File.Delete(fname);

var conn = new SQLiteConnectionStringBuilder
{
DataSource = fname
};
_connection = new SQLiteConnection(conn.ToString());
_connection.Open();
using (
var command = new SQLiteCommand("CREATE TABLE Node (Id INTEGER PRIMARY KEY, Name TEXT)", _connection))
{
command.ExecuteNonQuery();
}

using (
var command =
new SQLiteCommand(
"CREATE TABLE Edges (Node_From INTEGER NOT NULL, Node_To INTEGER, P1 INTEGER NOT NULL, P2 INTEGER NOT NULL, P3 INTEGER NOT NULL, P4 INTEGER NOT NULL)",
_connection))
{
command.ExecuteNonQuery();
}
}

public void Commit(IEnumerable<Node> values)
{
using (SQLiteTransaction transaction = _connection.BeginTransaction())
{
using (var command = new SQLiteCommand("INSERT INTO Node (Id, Name) VALUES (@Id, @Name)", _connection))
{
command.Parameters.Add("@Id", DbType.Int64);
command.Parameters.Add("@Name", DbType.String);

foreach (Node bts in values)
{
command.Parameters[0].Value = bts.Id;
command.Parameters[1].Value = bts.Name;
command.ExecuteNonQuery();
}
}

using (
var command =
new SQLiteCommand("INSERT INTO Edges (Node_From, Node_To, P1, P2, P3, P4) VALUES (@From, @To, @P1, @P2, @P3, @P4)",
_connection))
{
command.Parameters.Add("@From", DbType.Int64);
command.Parameters.Add("@To", DbType.Int64);
command.Parameters.Add("@P1", DbType.UInt16);
command.Parameters.Add("@P2", DbType.UInt16);
command.Parameters.Add("@P3", DbType.UInt16);
command.Parameters.Add("@P4", DbType.UInt16);

foreach (Node bts in values)
{
foreach (NodeId neighbour in bts.Edges)
{
command.Parameters[0].Value = (long) bts.Id;
command.Parameters[1].Value = null;
command.Parameters[2].Value = neighbour.P1;
command.Parameters[3].Value = neighbour.P2;
command.Parameters[4].Value = neighbour.P3;
command.Parameters[5].Value = neighbour.P4;
command.ExecuteNonQuery();
}
}
}

transaction.Commit();
}
}

public IEnumerable<Node> Select()
{
var ret = new Dictionary<uint, Node>();
using (var command = new SQLiteCommand("SELECT * FROM Node", _connection))
{
using (SQLiteDataReader reader = command.ExecuteReader())
{
while (reader.Read())
{
var bts = new Node
{
Id = (uint) reader.GetInt64(0),
Name = reader.GetString(1),
Edges = new List<NodeId>()
};
ret.Add(bts.Id, bts);
}
}
}

string sql = string.Format("SELECT * FROM Edges WHERE Node_From IN ({0})", string.Join(", ", ret.Keys));
using (var command = new SQLiteCommand(sql, _connection))
{
using (SQLiteDataReader reader = command.ExecuteReader())
{
while (reader.Read())
{
var id = (uint) reader.GetInt64(0);
int mcc = reader.GetInt32(2);
int mnc = reader.GetInt32(3);
int lac = reader.GetInt32(4);
int ci = reader.GetInt32(5);
ret[id].Edges.Add(new NodeId
{
P1 = (ushort) mcc,
P2 = (ushort) mnc,
P3 = (ushort) lac,
P4 = (ushort) ci
});
}
}
}

return ret.Values.ToList();
}

public void Dispose()
{
_connection.Dispose();
}
}


Flat Approach



public sealed class FlatGraphDatabase
: IDatabase
{
private readonly SQLiteConnection _connection;

public FlatGraphDatabase()
{
const string fname = @"E:\flat_graph.db";
if (File.Exists(fname))
File.Delete(fname);

var conn = new SQLiteConnectionStringBuilder
{
DataSource = fname
};
_connection = new SQLiteConnection(conn.ToString());
_connection.Open();
using (
var command = new SQLiteCommand("CREATE TABLE Node (Id INTEGER PRIMARY KEY, Name TEXT, Edges TEXT)", _connection))
{
command.ExecuteNonQuery();
}
}

public void Commit(IEnumerable<Node> values)
{
using (var transaction = _connection.BeginTransaction())
using (var command = new SQLiteCommand("INSERT INTO Node (Id, Name, Edges) VALUES (@Id, @Name, @Edges)", _connection))
{
command.Parameters.Add("@Id", DbType.Int64);
command.Parameters.Add("@Name", DbType.String);
command.Parameters.Add("@Edges", DbType.String);

foreach (var bts in values)
{
command.Parameters[0].Value = bts.Id;
command.Parameters[1].Value = bts.Name;
command.Parameters[2].Value = string.Join(",", bts.Edges);
command.ExecuteNonQuery();
}

transaction.Commit();
}
}

public IEnumerable<Node> Select()
{
var ret = new List<Node>();
using (var command = new SQLiteCommand("SELECT * FROM Node", _connection))
{
using (var reader = command.ExecuteReader())
{
while (reader.Read())
{
var bts = new Node
{
Id = (uint) reader.GetInt64(0),
Name = reader.GetString(1),
Edges = reader.GetString(2).Split(',').Select(NodeId.Parse).ToList()
};
ret.Add(bts);
}
}
}
return ret;
}

public void Dispose()
{
_connection.Dispose();
}
}


Thanks for your time in advance :)


Aucun commentaire:

Enregistrer un commentaire