我有一个控制台应用程序,它从instagram url中获取json数据,如下所示:https://instagram.com/{profileName}";
因此,我需要查询所有具有非null instagram用户名的行,然后每行通过公共api进行IG查找,以获取IG帐户id并更新相应的db行。
我有两段sql代码,它从表中进行全选,如果InstagramId为空,则输出它们,另一个查询是基于instagram用户名更新instagram ID。
我该如何对数据库中的每个instagram用户名进行批量搜索和更新,并将所有相应的instagram ID添加到其中?
public static async Task<InstagramUser> ScrapeInstagram(string url)
{
using (var client = new HttpClient())
{
var response = await client.GetAsync(url);
if (response.IsSuccessStatusCode)
{
// create html document
var htmlBody = await response.Content.ReadAsStringAsync();
var htmlDocument = new HtmlDocument();
htmlDocument.LoadHtml(htmlBody);
// select script tags
var scripts = htmlDocument.DocumentNode.SelectNodes("/html/body/script");
// preprocess result
var uselessString = "window._sharedData = ";
var scriptInnerText = scripts[0].InnerText
.Substring(uselessString.Length)
.Replace(";", "");
// serialize objects and fetch the user data
dynamic jsonStuff = JObject.Parse(scriptInnerText);
dynamic userProfile = jsonStuff["entry_data"]["ProfilePage"][0]["graphql"]["user"];
//Update database query
string connectionString = @"Server=MyProject-dev-db.cothtpanmcn7.ap-southeast-2.rds.amazonaws.com;Database=UserApp;User Id=testadmin;Password=test123;MultipleActiveResultSets=true;Trusted_Connection=False;";
using (SqlConnection con = new SqlConnection(connectionString))
{
//select all users with null value
SqlCommand select = new SqlCommand("Select * from ApplicationUser where InstagramId is null");
//Updates InstagramID based from user input of instagramUsername
SqlCommand cmd = new SqlCommand("Update ApplicationUser Set InstagramId = '" + userProfile.id + "'" + "where Instagram = '" + userProfile.username + "'", con);
cmd.Connection.Open();
cmd.ExecuteNonQuery();
}
// create an InstagramUser
var instagramUser = new InstagramUser
{
FullName = userProfile.full_name,
FollowerCount = userProfile.edge_followed_by.count,
FollowingCount = userProfile.edge_follow.count,
Id = userProfile.id,
url = url
};
return instagramUser;
}
else
{
throw new Exception($"Something wrong happened {response.StatusCode} - {response.ReasonPhrase} - {response.RequestMessage}");
}
}
}
您可以这样做。
- 获取所有没有InstagramId的用户
- 循环通过用户,并调用API
- 将数据从API映射到ApplicationUser
- 在SQL Server中创建临时表
- 在Temp表中插入数据(在我的示例中使用大容量复制(
- 使用临时表与ApplicationUser联接以更新大量行
伪代码的种类:
var applicationUsers = await GetAllApplicationUsersWithoutInstagramId(); // Get the records from the database
foreach(var user in applicationUsers)
{
var instagramInfo = await GetInstagramInfo(user ); // Get the data from the API
u.InstagramId = instagramInfo.Id;
// Map other properties
});
using (SqlConnection con = new SqlConnection(connectionString))
{
await con.OpenAsync();
var transaction = con.BeginTransaction();
// Create a temp table
var cmd = new SqlCommand("CREATE #NewData(InstagramUsername NVARCHAR(255) NOT NULL PRIMARY KEY, InstagramId BIGINT NOT NULL /*Other columns*/)", con, transaction);
await cmd.ExecuteNonQueryAsycn();
// Create a data table to use with the SqlBulkCopy
var table = new DataTable();
foreach (var applicationUser in applicationUsers)
{
var row = table.NewRow();
row["InstagramUsername"] = applicationUser.Instagram;
row["InstagramId "] = applicationUser.InstagramId;
/*Other columns*/
table.Rows.Add(row);
}
// Bulk insert the new data into the temp table
using var bulkCopy = new SqlBulkCopy(con, SqlBulkCopyOptions.Default, transaction)
{
DestinationTableName = "#NewData",
BulkCopyTimeout = 3600 // 1 hour
};
await bulkCopy.WriteToServerAsync(table);
// Perform the update
cmd = new SqlCommand(@"
UPDATE au
SET au.InstagramId = nd.InstagramId
/*Other columns*/
FROM ApplicationUser au
INNER JOIN #NewData nd on au.Instagram = nd.InstagramUsername
", con,transaction);
await cmd.ExecuteNonQueryAsync();
await transaction.CommitAsync();
}
试试这个代码
public async Task<int> DeleteMultiple(int[] ids)
{
return await _factoryRepository.BatchUpdateAsync
(a => new Factory
{
IsDeleted = true;
}, a =>
ids.Contains(a.Id));
});
}