使用 COPY 将 .json 文件导入到 PostgreSQL 表中



我想将一些天气数据(温度、风速等(导入到 PostgreSQL 11 表中,这些数据全部格式化为 JSON 文件,以便我可以对该数据进行查询。

我已经能够手动将一些数据插入到表中,但这没关系,因为它是少量数据,我计划之后使用更多数据。这是我使用 INSERT 函数发现的:https://datavirtuality.com/blog-json-in-postgresql/。

这就是为什么我一直在尝试使用 COPY 功能但到目前为止没有运气,即使在互联网上阅读了很多不同来源的东西之后......

JSON 文件可在那里下载:https://queueresults.meteoblue.com/F2637B90-45BB-4E7A-B47C-C34CD56674B3(如果该文件不再存在,请告诉我(。

我已经能够将 JSON 文件作为文本导入到表中:

create table temp_json (values text);
copy temp_json from '/home/cae/test.json';

但我不认为这是以后能够进行有效查询的最佳方法......

我通常在测试过程中遇到以下错误:

ERROR:  invalid input syntax for type json
DETAIL:  The input string ended unexpectedly.
CONTEXT:  JSON data, line 1: [

好像我无法在 PostgreSQL 中正确解析 JSON 文件和数组......

感谢您的帮助!

编辑:这是JSON文件的内容:

[
{
"geometry": {
"type": "MultiPoint",
"locationNames": [
"59.4°N/24.7°E31.7m",
"59.4°N/24.8°E36.4m"
],
"coordinates": [
[
24.7,
59.4,
31.73
],
[
24.8,
59.4,
36.445
]
]
},
"domain": "NEMS12",
"codes": [
{
"unit": "°C",
"dataPerTimeInterval": [
{
"data": [
[
-0.395,
-0.195,
-0.099999994,
-0.030000001,
-0.060000002,
-0.099999994,
-0.099999994,
0.005,
-0.055,
0.19,
0.48,
0.725,
1.88,
1.88,
1.855,
1.935,
2.1950002,
2.595,
3.3049998,
4.115,
3.37,
2.97,
3.32,
3.5149999,
3.56,
3.44,
3.355,
3.3600001,
3.32,
3.32,
3.4250002,
3.42,
3.3899999,
3.445,
3.3200002,
3.0549998,
4.58,
4.01,
3.02,
2.79,
2.75,
2.76,
2.855,
2.99,
2.96,
2.775,
2.595,
2.4250002
],
[
-0.49,
-0.26,
-0.16,
-0.09,
-0.1,
-0.13,
-0.12,
0.01,
-0.07,
0.17,
0.44,
0.66,
1.84,
1.85,
1.83,
1.9,
2.15,
2.55,
3.27,
4.11,
3.46,
2.96,
3.31,
3.5,
3.55,
3.42,
3.33,
3.34,
3.29,
3.29,
3.43,
3.44,
3.42,
3.52,
3.41,
3.11,
4.53,
4,
3.01,
2.79,
2.76,
2.77,
2.87,
3,
2.93,
2.71,
2.53,
2.38
]
],
"gapFillRatio": 0
}
],
"level": "2 m above gnd",
"aggregation": "none",
"code": 11,
"variable": "Temperature"
}
],
"timeIntervals": [
[
"20180101T0000",
"20180101T0100",
"20180101T0200",
"20180101T0300",
"20180101T0400",
"20180101T0500",
"20180101T0600",
"20180101T0700",
"20180101T0800",
"20180101T0900",
"20180101T1000",
"20180101T1100",
"20180101T1200",
"20180101T1300",
"20180101T1400",
"20180101T1500",
"20180101T1600",
"20180101T1700",
"20180101T1800",
"20180101T1900",
"20180101T2000",
"20180101T2100",
"20180101T2200",
"20180101T2300",
"20180102T0000",
"20180102T0100",
"20180102T0200",
"20180102T0300",
"20180102T0400",
"20180102T0500",
"20180102T0600",
"20180102T0700",
"20180102T0800",
"20180102T0900",
"20180102T1000",
"20180102T1100",
"20180102T1200",
"20180102T1300",
"20180102T1400",
"20180102T1500",
"20180102T1600",
"20180102T1700",
"20180102T1800",
"20180102T1900",
"20180102T2000",
"20180102T2100",
"20180102T2200",
"20180102T2300"
]
],
"timeResolution": "hourly"
},
{
"geometry": {
"coordinates": [
[
24.7,
59.4,
31.73
],
[
24.8,
59.4,
36.445
]
],
"locationNames": [
"59.4°N/24.7°E31.7m",
"59.4°N/24.8°E36.4m"
],
"type": "MultiPoint"
},
"domain": "NEMS12",
"codes": [
{
"unit": "°C",
"aggregation": "none",
"code": 11,
"level": "1000 mb",
"dataPerTimeInterval": [
{
"data": [
[
-0.585,
-0.265,
-0.055,
0.04,
0.044999998,
0.08,
0.11,
0.205,
0.13499999,
0.43,
0.84000003,
1.2,
2.1,
2.33,
2.5,
2.72,
3.1750002,
3.775,
4.915,
5.37,
4.16,
3.795,
4.1949997,
4.41,
4.415,
4.275,
4.1800003,
4.16,
4.0950003,
4.08,
4.185,
4.1,
3.98,
3.575,
3.22,
2.92,
4.395,
3.7649999,
2.895,
2.66,
2.6550002,
2.72,
2.845,
2.955,
2.89,
2.685,
2.54,
2.355
],
[
-0.64,
-0.29,
-0.08,
0.01,
0.03,
0.08,
0.12,
0.24,
0.14,
0.4,
0.8,
1.13,
2.11,
2.34,
2.52,
2.74,
3.19,
3.82,
4.91,
5.45,
4.29,
3.81,
4.19,
4.42,
4.43,
4.28,
4.17,
4.15,
4.08,
4.06,
4.18,
4.12,
4.01,
3.66,
3.31,
2.97,
4.38,
3.79,
2.9,
2.68,
2.68,
2.75,
2.89,
2.99,
2.88,
2.64,
2.43,
2.27
]
],
"gapFillRatio": 0
}
],
"variable": "Temperature"
}
],
"timeIntervals": [
[
"20180101T0000",
"20180101T0100",
"20180101T0200",
"20180101T0300",
"20180101T0400",
"20180101T0500",
"20180101T0600",
"20180101T0700",
"20180101T0800",
"20180101T0900",
"20180101T1000",
"20180101T1100",
"20180101T1200",
"20180101T1300",
"20180101T1400",
"20180101T1500",
"20180101T1600",
"20180101T1700",
"20180101T1800",
"20180101T1900",
"20180101T2000",
"20180101T2100",
"20180101T2200",
"20180101T2300",
"20180102T0000",
"20180102T0100",
"20180102T0200",
"20180102T0300",
"20180102T0400",
"20180102T0500",
"20180102T0600",
"20180102T0700",
"20180102T0800",
"20180102T0900",
"20180102T1000",
"20180102T1100",
"20180102T1200",
"20180102T1300",
"20180102T1400",
"20180102T1500",
"20180102T1600",
"20180102T1700",
"20180102T1800",
"20180102T1900",
"20180102T2000",
"20180102T2100",
"20180102T2200",
"20180102T2300"
]
],
"timeResolution": "hourly"
},
{
"geometry": {
"type": "MultiPoint",
"locationNames": [
"59.4°N/24.7°E31.7m",
"59.4°N/24.8°E36.4m"
],
"coordinates": [
[
24.7,
59.4,
31.73
],
[
24.8,
59.4,
36.445
]
]
},
"domain": "NEMS12",
"codes": [
{
"unit": "°C",
"dataPerTimeInterval": [
{
"data": [
[
-7.0950003,
-6.615,
-4.815,
-3.55,
-2.6750002,
-2.1950002,
-2.695,
-2.87,
-2.1399999,
-0.995,
0.1,
1,
0.335,
0.38,
-0.030000001,
-0.8,
-0.18,
0.575,
1.11,
-0.32999998,
-1.03,
-2.31,
-3.09,
-3.7350001,
-3.93,
-3.905,
-3.92,
-3.71,
-3.625,
-3.195,
-3.7,
-3.32,
-3.72,
-3.915,
-3.93,
-3.605,
-4.315,
-3.8899999,
-3.815,
-3.38,
-3.2150002,
-3.27,
-3.435,
-3.47,
-3.43,
-3.37,
-3.44,
-3.51
],
[
-7.11,
-6.73,
-4.94,
-3.57,
-2.7,
-2.15,
-2.62,
-2.91,
-2.22,
-1.1,
0.03,
0.9,
0.36,
0.37,
0.11,
-0.74,
-0.13,
0.59,
1.19,
-0.19,
-0.95,
-2.18,
-3.08,
-3.68,
-3.97,
-3.94,
-3.93,
-3.69,
-3.63,
-3.27,
-3.7,
-3.32,
-3.68,
-3.9,
-3.97,
-3.6,
-4.29,
-3.92,
-3.8,
-3.37,
-3.24,
-3.28,
-3.42,
-3.44,
-3.39,
-3.35,
-3.37,
-3.44
]
],
"gapFillRatio": 0
}
],
"level": "850 mb",
"code": 11,
"aggregation": "none",
"variable": "Temperature"
}
],
"timeResolution": "hourly",
"timeIntervals": [
[
"20180101T0000",
"20180101T0100",
"20180101T0200",
"20180101T0300",
"20180101T0400",
"20180101T0500",
"20180101T0600",
"20180101T0700",
"20180101T0800",
"20180101T0900",
"20180101T1000",
"20180101T1100",
"20180101T1200",
"20180101T1300",
"20180101T1400",
"20180101T1500",
"20180101T1600",
"20180101T1700",
"20180101T1800",
"20180101T1900",
"20180101T2000",
"20180101T2100",
"20180101T2200",
"20180101T2300",
"20180102T0000",
"20180102T0100",
"20180102T0200",
"20180102T0300",
"20180102T0400",
"20180102T0500",
"20180102T0600",
"20180102T0700",
"20180102T0800",
"20180102T0900",
"20180102T1000",
"20180102T1100",
"20180102T1200",
"20180102T1300",
"20180102T1400",
"20180102T1500",
"20180102T1600",
"20180102T1700",
"20180102T1800",
"20180102T1900",
"20180102T2000",
"20180102T2100",
"20180102T2200",
"20180102T2300"
]
]
},
{
"geometry": {
"type": "MultiPoint",
"locationNames": [
"59.4°N/24.7°E31.7m",
"59.4°N/24.8°E36.4m"
],
"coordinates": [
[
24.7,
59.4,
31.73
],
[
24.8,
59.4,
36.445
]
]
},
"domain": "NEMS12",
"codes": [
{
"unit": "°C",
"dataPerTimeInterval": [
{
"data": [
[
-10.84,
-12,
-10.280001,
-8.865,
-8.5,
-7.7,
-7.5699997,
-7.655,
-8.434999,
-8.844999,
-8.700001,
-7.1549997,
-9.555,
-10.004999,
-7.885,
-8.32,
-8.370001,
-8.915,
-9.53,
-10.225,
-10.934999,
-11.12,
-11.434999,
-11.575,
-11.965,
-11.64,
-12.12,
-12.345,
-12.34,
-12.48,
-12.844999,
-13.174999,
-13.18,
-13.219999,
-13.434999,
-13.305,
-12.775,
-12.745,
-12.79,
-12.75,
-12.690001,
-12.77,
-12.77,
-12.76,
-12.67,
-12.605,
-12.635,
-12.695
],
[
-10.74,
-11.94,
-10.54,
-8.77,
-8.56,
-7.75,
-7.52,
-7.53,
-8.24,
-8.95,
-8.77,
-7.15,
-9.48,
-10.03,
-7.88,
-8.24,
-8.35,
-8.82,
-9.4,
-10.08,
-10.84,
-11.04,
-11.3,
-11.5,
-11.9,
-11.6,
-12.09,
-12.31,
-12.39,
-12.48,
-12.83,
-13.16,
-13.2,
-13.19,
-13.4,
-13.3,
-12.77,
-12.7,
-12.78,
-12.71,
-12.66,
-12.73,
-12.73,
-12.72,
-12.62,
-12.57,
-12.6,
-12.67
]
],
"gapFillRatio": 0
}
],
"code": 11,
"level": "700 mb",
"aggregation": "none",
"variable": "Temperature"
}
],
"timeResolution": "hourly",
"timeIntervals": [
[
"20180101T0000",
"20180101T0100",
"20180101T0200",
"20180101T0300",
"20180101T0400",
"20180101T0500",
"20180101T0600",
"20180101T0700",
"20180101T0800",
"20180101T0900",
"20180101T1000",
"20180101T1100",
"20180101T1200",
"20180101T1300",
"20180101T1400",
"20180101T1500",
"20180101T1600",
"20180101T1700",
"20180101T1800",
"20180101T1900",
"20180101T2000",
"20180101T2100",
"20180101T2200",
"20180101T2300",
"20180102T0000",
"20180102T0100",
"20180102T0200",
"20180102T0300",
"20180102T0400",
"20180102T0500",
"20180102T0600",
"20180102T0700",
"20180102T0800",
"20180102T0900",
"20180102T1000",
"20180102T1100",
"20180102T1200",
"20180102T1300",
"20180102T1400",
"20180102T1500",
"20180102T1600",
"20180102T1700",
"20180102T1800",
"20180102T1900",
"20180102T2000",
"20180102T2100",
"20180102T2200",
"20180102T2300"
]
]
},
{
"geometry": {
"type": "MultiPoint",
"locationNames": [
"59.4°N/24.7°E",
"59.4°N/24.8°E"
],
"coordinates": [
[
24.7,
59.4,
"NaN"
],
[
24.8,
59.4,
"NaN"
]
]
},
"domain": "CAMSGLOBAL",
"codes": [
{
"unit": "",
"dataPerTimeInterval": [
{
"data": [
[
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN"
],
[
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN",
"NaN"
]
],
"gapFillRatio": 0
}
],
"code": 706,
"level": "sfc",
"aggregation": "none",
"variable": "Air Quality Index"
}
],
"timeResolution": "hourly",
"timeIntervals": [
[
"20180101T0000",
"20180101T0100",
"20180101T0200",
"20180101T0300",
"20180101T0400",
"20180101T0500",
"20180101T0600",
"20180101T0700",
"20180101T0800",
"20180101T0900",
"20180101T1000",
"20180101T1100",
"20180101T1200",
"20180101T1300",
"20180101T1400",
"20180101T1500",
"20180101T1600",
"20180101T1700",
"20180101T1800",
"20180101T1900",
"20180101T2000",
"20180101T2100",
"20180101T2200",
"20180101T2300",
"20180102T0000",
"20180102T0100",
"20180102T0200",
"20180102T0300",
"20180102T0400",
"20180102T0500",
"20180102T0600",
"20180102T0700",
"20180102T0800",
"20180102T0900",
"20180102T1000",
"20180102T1100",
"20180102T1200",
"20180102T1300",
"20180102T1400",
"20180102T1500",
"20180102T1600",
"20180102T1700",
"20180102T1800",
"20180102T1900",
"20180102T2000",
"20180102T2100",
"20180102T2200",
"20180102T2300"
]
]
}
]

给定您的第一个示例,您可以像这样处理它,将 json 数组分成单独的对象,并将它们作为单独的行填充到表中:

create table real_json as select value::jsonb from temp_json join lateral json_array_elements(values::json) on true;

但是,这取决于大型单个 json 对象始终足够小,可以舒适地放入您愿意使用的内存量中,这似乎是一个可疑的提议。 您需要一个对 JSON 对象进行增量或流式解析的库,一次返回一个二级对象,然后从返回的内存中清除它。 我不认为PostgreSQL提供了这样的功能。 如果您让我们知道您最喜欢的编程语言是什么,也许有人可以提出一个特定的库。

或者,您可以编写一个快速而脏的脚本,该脚本基于"漂亮"文件的缩进始终正确的假设将 JSON 划分为单独的记录的行,因此使用"^ [{}]"作为标记,然后去除换行符以反转"漂亮"格式,以便每条记录都是一行。 如果你有这样的脚本,你可以做:

copy real_json FROM PROGRAM 'unnest_top_array_and_depretty /home/cae/test_without_new_lines.json';

与真正的工作命令行工具相同的@jjanes代码。

copy json_table FROM PROGRAM 'jq --stream -nc -f myfile.json';

从文件中删除"漂亮的格式"有助于使用 COPY 函数,但它将文件的全部内容放在一行中,因此无法对现有列运行简单的 SELECT 查询......

这是我使用的:

CREATE TEMP TABLE target(data jsonb);
copy target from '/home/cae/test_without_new_lines.json';

最新更新