使用 XML 文件模拟聚集索引



>假设我有 1 个表在 XML 文件中包含有关(ID、名称、城市、国家/地区(的数据,我需要使用 c# 模拟聚集索引以证明它将提高搜索性能我的任务是在 ID 字段上创建索引

我对索引文件的了解是,每行都指向数据文件中的某个位置 聚集索引在逻辑上的工作方式与主索引不同

我尝试搜索聚集索引,我了解到它存在于数据库管理系统中,它将在创建主键时自动创建 我安装了 SSMS,但无法访问聚集索引文件我想知道它的外观或如何在 c# 中模拟它(因为这意味着物理排序数据(?

尝试如下代码:

using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Xml;
using System.Xml.Linq;
using System.Data;
using System.Xml.Serialization;
using System.IO;
namespace ConsoleApplication2
{
class Program
{
static void Main(string[] args)
{
DateTime startTime;
DateTime endTime;
DataTable dt = new DataTable();
dt.Columns.Add("ID", typeof(string));
dt.Columns.Add("Name", typeof(string));
dt.Columns.Add("City", typeof(string));
dt.Columns.Add("Country", typeof(string));
dt.Rows.Add(new object[] { "1", "Bronx", "New York", "USA" });
dt.Rows.Add(new object[] { "2", "Queens", "New York", "USA" });
dt.Rows.Add(new object[] { "3", "Hollywood", "Los Angeles", "USA" });
dt.Rows.Add(new object[] { "4", "Circle Theater", "London", "Great_Britan" });
dt.Rows.Add(new object[] { "5", "Eiffle Tower", "Paris", "France" });
string ident = "<?xml version="1.0" encoding="utf-8"?><Database></Database>";
XDocument doc = XDocument.Parse(ident);
XElement database = doc.Root;
foreach (var country in dt.AsEnumerable().GroupBy(x => x.Field<string>("Country")))
{
XElement xCountry = new XElement("Country", country.Key);
database.Add(xCountry);
foreach (var city in country.GroupBy(x => x.Field<string>("City")))
{
XElement xCity = new XElement("City", city.Key);
xCountry.Add(xCity);
foreach (DataRow name in city.AsEnumerable())
{
XElement xName = new XElement("Name", name.Field<string>("Name"), new XElement("ID", name.Field<string>("ID")));
xCity.Add(xName);
}
}
}
//simulation no indexing
List<XElement> xIds = doc.Descendants("ID").ToList();
startTime = DateTime.Now;
foreach (DataRow row in dt.AsEnumerable())
{
string id = row.Field<string>("ID");
foreach (XElement xId in xIds)
{
if (id == (string)xId)
break;
}
}
endTime = DateTime.Now;
TimeSpan sim1 = endTime - startTime;
//simulation with indexing
StringReader sReader = new StringReader(doc.ToString());
XmlSerializer serializer = new XmlSerializer(typeof(Database));
Database db = (Database)serializer.Deserialize(sReader);
startTime = DateTime.Now;
foreach (DataRow row in dt.AsEnumerable())
{
string country = row.Field<string>("Country");
string city = row.Field<string>("City");
string name = row.Field<string>("Name");
string ID = row.Field<string>("ID");
Country cCountry = db.Country.Where(x => x.name == country).FirstOrDefault();
City cCity = cCountry.City.Where(x => x.name == city).FirstOrDefault();
Name cName = cCity.Name.Where(x => x.name == name).FirstOrDefault();
string cID = cName.ID;
}
endTime = DateTime.Now;
TimeSpan sim2 = endTime - startTime;

}
}
public class Database
{
[XmlElement]
public List<Country> Country { get; set; }
}
public class Country
{
[XmlText]
public string name { get; set; }
[XmlElement]
public List<City> City { get; set; }
}
public class City
{
[XmlText]
public string name { get; set; }
[XmlElement]
public List<Name> Name { get; set; }
}
public class Name
{
[XmlText]
public string name { get; set; }
public string ID { get; set; }
}
}

结果如下:

<Database>
<Country>
USA
<City>
New York
<Name>
Bronx
<ID>1</ID>
</Name>
<Name>
Queens
<ID>2</ID>
</Name>
</City>
<City>
Los Angeles
<Name>
Hollywood
<ID>3</ID>
</Name>
</City>
</Country>
<Country>
Great_Britan
<City>
London
<Name>
Circle Theater
<ID>4</ID>
</Name>
</City>
</Country>
<Country>
France
<City>
Paris
<Name>
Effifle Tower
<ID>5</ID>
</Name>
</City>
</Country>
</Database>

.NET 中最接近群集 BTree 的数据结构是SortedSet<T>

从 .NET Framework 4 开始,SortedSet 类提供了一个 自平衡树,在之后按排序顺序维护数据 插入、删除和搜索。

https://learn.microsoft.com/en-us/dotnet/standard/collections/sorted-collection-types

您可以遍历 XML 文档,并将 XElement 项添加到具有自定义比较器的SortedSet<XElement>SortedSet<MyEntity>中,以便按 ID 排序。

最新更新