我有一个场景,我需要从API获得10000人的数据。不幸的是,API提供程序没有分页功能(参见🙁)。现在我在处理这个案子的时候必须非常小心。我所要做的就是下载这些数据并将其保存在JArray中,以便对每个对象进行进一步处理。
我试着遵循每一个其他的最佳实践来获取巨大的数据使用HttpClient和存储在JArray,但我得到了System.OutOfMemoryException
上Newtonsoft.Json。
我的代码块是这样的-
public async Task<JArray> GetContactsObject(ConnectorToken token)
{
JArray contacts = new JArray();
try
{
using (var client = new HttpClient())
{
client.Timeout = TimeSpan.FromMinutes(10);
string requestUrl = "<api_uri>";
client.DefaultRequestHeaders.Add("Authorization", GenearateHeaders("GET", requestUrl, token));
client.DefaultRequestHeaders.TryAddWithoutValidation("Accept", "application/json");
using (Stream s = await client.GetStreamAsync(requestUrl))
using (StreamReader sr = new StreamReader(s))
using (JsonReader reader = new JsonTextReader(sr))
{
contacts = JArray.Load(reader);
}
}
}
catch (Exception ex)
{
_logger.Fatal(ex);
}
return contacts;
}
我到底做错了什么?API响应是巨大的,它有10,000个联系人,响应大小约为70-80mb。
更新的代码片段-仍然抛出异常,但一个改进,我会说
public async Task<MailPlusContacts> GetContactsObject(ConnectorToken token)
{
MailPlusContacts contacts = new MailPlusContacts();
try
{
using (var client = new HttpClient())
{
client.Timeout = TimeSpan.FromMinutes(10);
string requestUrl = "<api_uri>";
client.DefaultRequestHeaders.Add("Authorization", GenearateHeaders("GET", requestUrl, token));
client.DefaultRequestHeaders.TryAddWithoutValidation("Accept", "application/json");
using (Stream s = await client.GetStreamAsync(requestUrl))
using (StreamReader sr = new StreamReader(s))
using (JsonTextReader reader = new JsonTextReader(sr))
{
while (reader.Read())
{
if (reader.TokenType == JsonToken.StartObject)
{
// Load each object from the stream and do something with it
JObject obj = JObject.Load(reader);
contacts.Contacts.Add(obj);
}
}
}
}
}
catch (Exception ex)
{
}
return contacts;
}
示例JSON(1个对象,考虑10k个对象)
[
{
"externalId": "4D8C802F4DE244248D97E8C80F628AF3",
"created": 1471413089000,
"encryptedId": "bbhjZi4UYIiYnnZ",
"testGroup": false,
"lastChanged": 1476683221000,
"temporary": false,
"properties": {
"prop1": "11:09",
"birthdate": "1991-07-01",
"freeField1": "*1000000000*",
"freeField2": "Y",
"street": "Shivaji Nagar",
"houseNumber": "123",
"city": "Pune",
"list4": [
{
"description": "dropdown item",
"bit": 1,
"enabled": false
},
{
"description": "dropdown item 2",
"bit": 2,
"enabled": false
},
{
"description": "dropdown item 3",
"bit": 4,
"enabled": false
},
{
"description": "dropdown item 23-1-2016",
"bit": 8,
"enabled": false
},
{
"description": "dropdown item 2 23-1-2016",
"bit": 16,
"enabled": false
},
{
"description": "dropdown item 2 27-1-2016",
"bit": 32,
"enabled": false
},
{
"description": "Meeloopdagen",
"bit": 64,
"enabled": false
},
{
"description": "dropdown item 2 5-3-2016",
"bit": 128,
"enabled": false
},
{
"description": "dropdown item 5-3-2016",
"bit": 256,
"enabled": true
},
{
"description": "dropdown item 2 Chemie 1-3-2016",
"bit": 512,
"enabled": false
},
{
"description": "dropdown item 2 Chemie 8-3-2016",
"bit": 1024,
"enabled": false
},
{
"description": "dropdown item 23-4-2016",
"bit": 2048,
"enabled": false
},
{
"description": "dropdown item 2 23-4-2016",
"bit": 4096,
"enabled": false
}
],
"list5": [
{
"description": "Ja",
"bit": 1,
"enabled": true
},
{
"description": "Nee",
"bit": 2,
"enabled": false
}
],
"list2": [
{
"description": "Test Werk en Test",
"bit": 1,
"enabled": false
},
{
"description": "Test Test Test",
"bit": 2,
"enabled": false
},
{
"description": "Test Psychologie",
"bit": 4,
"enabled": false
},
{
"description": "Test Therapie",
"bit": 8,
"enabled": false
},
{
"description": "Test",
"bit": 16,
"enabled": false
},
{
"description": "Test",
"bit": 32,
"enabled": false
},
{
"description": "Bio-Test",
"bit": 64,
"enabled": false
},
{
"description": "Test",
"bit": 128,
"enabled": false
},
{
"description": "Test Economie",
"bit": 256,
"enabled": false
},
{
"description": "Test",
"bit": 512,
"enabled": false
},
{
"description": "HBO-Test",
"bit": 1024,
"enabled": false
},
{
"description": "Sociaal Test Test",
"bit": 2048,
"enabled": false
},
{
"description": "Human Test Test",
"bit": 4096,
"enabled": false
},
{
"description": "Test en Test",
"bit": 8192,
"enabled": false
},
{
"description": "Test",
"bit": 16384,
"enabled": false
}
],
"postalCode": "1201AX",
"list3": [
{
"description": "Test 1",
"bit": 1,
"enabled": false
},
{
"description": "Test 1",
"bit": 2,
"enabled": false
},
{
"description": "DTest 1",
"bit": 4,
"enabled": false
},
{
"description": "Test 1",
"bit": 8,
"enabled": false
},
{
"description": "LTest 1",
"bit": 16,
"enabled": false
},
{
"description": "Test 1",
"bit": 32,
"enabled": false
},
{
"description": "PTest 1",
"bit": 64,
"enabled": false
},
{
"description": "Test 1",
"bit": 128,
"enabled": false
},
{
"description": "STest 1",
"bit": 256,
"enabled": false
},
{
"description": "TTest 1",
"bit": 512,
"enabled": false
},
{
"description": "MTest 1",
"bit": 1024,
"enabled": false
},
{
"description": "KTest 1",
"bit": 2048,
"enabled": false
},
{
"description": "Test 1",
"bit": 4096,
"enabled": false
},
{
"description": "Test 1e",
"bit": 8192,
"enabled": false
},
{
"description": "Test 1",
"bit": 16384,
"enabled": false
},
{
"description": "Test 1n",
"bit": 32768,
"enabled": false
},
{
"description": "PTest 1jk",
"bit": 65536,
"enabled": false
},
{
"description": "BTest 1a",
"bit": 131072,
"enabled": false
},
{
"description": "BTest 1ek ",
"bit": 262144,
"enabled": false
},
{
"description": "Test 1",
"bit": 524288,
"enabled": false
},
{
"description": "Test 1",
"bit": 1048576,
"enabled": false
},
{
"description": "Test 1 Economie",
"bit": 2097152,
"enabled": false
},
{
"description": "Test ",
"bit": 4194304,
"enabled": true
},
{
"description": "HBO-Test",
"bit": 8388608,
"enabled": false
},
{
"description": "Test Test Test",
"bit": 16777216,
"enabled": false
},
{
"description": "Test in de Test",
"bit": 33554432,
"enabled": false
},
{
"description": "Test Resource Test",
"bit": 67108864,
"enabled": false
},
{
"description": "Test Test",
"bit": 134217728,
"enabled": false
}
],
"phoneNumber": "0793631212",
"initials": "Rahul",
"list1": [
{
"description": "Test 1",
"bit": 1,
"enabled": false
},
{
"description": "Test 2",
"bit": 2,
"enabled": false
},
{
"description": "Test 3",
"bit": 4,
"enabled": true
},
{
"description": "Test 4",
"bit": 8,
"enabled": false
},
{
"description": "Test 5",
"bit": 16,
"enabled": false
},
{
"description": "Test 6",
"bit": 32,
"enabled": false
},
{
"description": "tet7 ",
"bit": 64,
"enabled": false
},
{
"description": "Test 8",
"bit": 128,
"enabled": false
},
{
"description": "Test 1",
"bit": 256,
"enabled": false
},
{
"description": "Test 1",
"bit": 512,
"enabled": false
},
{
"description": "Test 1",
"bit": 1024,
"enabled": false
},
{
"description": "Test 1",
"bit": 2048,
"enabled": false
},
{
"description": "Test 1",
"bit": 4096,
"enabled": false
},
{
"description": "Test 1",
"bit": 8192,
"enabled": false
},
{
"description": "Test 1",
"bit": 16384,
"enabled": false
},
{
"description": "Test 1",
"bit": 32768,
"enabled": false
},
{
"description": "Test 1",
"bit": 65536,
"enabled": false
},
{
"description": "Test 1",
"bit": 131072,
"enabled": false
},
{
"description": "Test 1",
"bit": 262144,
"enabled": false
},
{
"description": "Test 1",
"bit": 524288,
"enabled": false
},
{
"description": "Test 1",
"bit": 1048576,
"enabled": true
},
{
"description": "Test 1 Economie",
"bit": 2097152,
"enabled": false
},
{
"description": "Test 1",
"bit": 4194304,
"enabled": false
},
{
"description": "HBO-Test 1",
"bit": 8388608,
"enabled": false
},
{
"description": "Test 1 ",
"bit": 16777216,
"enabled": false
},
{
"description": "Test 1",
"bit": 33554432,
"enabled": false
},
{
"description": "Test 1",
"bit": 67108864,
"enabled": false
},
{
"description": "Test 1",
"bit": 134217728,
"enabled": false
}
],
"gender": "M",
"firstName": "Rahul Patil",
"list6": [
{
"description": "Test",
"bit": 1,
"enabled": false
},
{
"description": "Test ",
"bit": 2,
"enabled": false
},
{
"description": "Test ",
"bit": 4,
"enabled": false
},
{
"description": "Test ",
"bit": 8,
"enabled": false
},
{
"description": "Test Test",
"bit": 16,
"enabled": false
},
{
"description": "Test",
"bit": 32,
"enabled": false
},
{
"description": "Test Test Test",
"bit": 64,
"enabled": false
},
{
"description": "Test-Test Test",
"bit": 128,
"enabled": false
},
{
"description": "Test in de Test",
"bit": 256,
"enabled": false
},
{
"description": "Test Test Test Test",
"bit": 512,
"enabled": false
},
{
"description": "Test Test",
"bit": 1024,
"enabled": false
},
{
"description": "Test Test en Test",
"bit": 2048,
"enabled": false
},
{
"description": "Test Therapie",
"bit": 4096,
"enabled": false
},
{
"description": "Test",
"bit": 8192,
"enabled": false
},
{
"description": "Test Resource Test",
"bit": 16384,
"enabled": false
},
{
"description": "HBO-Test",
"bit": 32768,
"enabled": false
},
{
"description": "Test",
"bit": 65536,
"enabled": false
},
{
"description": "Test Test",
"bit": 131072,
"enabled": false
},
{
"description": "Test",
"bit": 262144,
"enabled": false
},
{
"description": "Test",
"bit": 524288,
"enabled": false
},
{
"description": "Bio-Test",
"bit": 1048576,
"enabled": false
},
{
"description": "Test en Test Test",
"bit": 2097152,
"enabled": false
}
],
"date1": "2016-09-30",
"lastName": "Patil",
"organisation": "Test Organization",
"email": "john@doe.com",
"vrij15": "N",
"profileFields": [
{
"description": "testField",
"bit": 1,
"enabled": false
},
{
"description": "test1",
"bit": 2,
"enabled": true
}
],
"profileField2": "asd",
"numeric1": "10",
"profileField1": "asd",
"profileField3": "asd"
},
"channels": [
{
"name": "EMAIL",
"value": true
},
{
"name": "SMS",
"value": false
}
]
}
]
很可能需要手动拆分对象。在这种情况下,在创建'JObject.Load(reader);'时发生的解析是大量的。
或者它们可能是对象增长一定数量的问题,对于非常大的对象动态调整大小可能会导致问题。
我敢打赌你把它作为一根绳子拉起来会更幸运。
//Read raw json as a string from the body of the HTTP post, and don’t parse it
string results = await Request.Content.ReadAsStringAsync();
如果它首先是一个字符串,那么解析这个大字符串可能会更好。
但是您可能会发现它仍然太大,这时您将希望自己执行部分解析。用我所说的"有趣"的方式来做:
查找数组的起始对象,然后对括号进行计数示例:如果你的数据返回
{"employees":[
{"firstName":"John", "lastName":"Doe"},
{"firstName":"Anna", "lastName":"Smith"},
{"firstName":"Peter", "lastName":"Jones"}
]}
你想这样做:
var headerIndex = '{"employees":['.Length;
var startIndex = result.IndexOf('{',headerIndex);
if(startIndex != -1)
{
var bracketcount = 1;
int rowCount;
for(rowCount = 0; i < int.MaxValue && bracketcount != 0)
{
if (result[i] == '{') bracketcount++;
if(result[i] == '}') bracketcount--;
}
var smallerObjectString = result.SubString(0,rowCount);
//create a single object and handle it
响应可能"只有" 80MB(这是一个巨大的文本响应),但是将其表示为内存中的对象层次结构会占用更多的空间。
JSON。NET能够从流中反序列化,因此整个字符串不会被加载到内存中。相反,最可能的罪魁祸首是您将所有内容存储在单个contacts
变量中,并且JArray
很可能解析其中的每个子节点。
这里有一些JSON。. NET性能提示防止内存耗尽