在Vegalite上添加回归线



我正试图在散点图上添加回归线和R平方值。我知道我应该使用图层功能和

 "transform": [
                {
                    "regression": "GDP per capita",
                    "on": "Educationalattainment",
                }

但在尝试了一百万次之后,我不知道在哪里插入代码行。这是我的图表的代码

{
  "$schema": "https://vega.github.io/schema/vega-lite/v4.json",
  "title": {
    "text": "GDP per capita and Education Attainment",
    "subtitle": "From 2015-2020. Sources: World Bank",
    "subtitleFontStyle": "italic",
    "subtitleFontSize": 10,
    "anchor": "start",
    "color": "black"
  },
  "height": 300,
  "width": 300,
  "data": {
    "url": "https://raw.githubusercontent.com/jamieprince/jamieprince.github.io/main/correlation.csv"
  },
  "transform": [
    {"calculate": "datum.Educationalattainment/100", "as": "percent"},
    {"filter": {
    "field": "Educationalattainment",
    "gt": 0
  }}
  ],
  "selection": {
    "paintbrush": {
      "type": "multi",
      "on": "mouseover",
      "nearest": true
    },
    "grid": {
      "type": "interval",
      "bind": "scales"
    }
  },
  "mark": {
    "type": "circle",
    "opacity": 0.5,
    "color": "#EC9D3E"
  },
  "encoding": {
    "x": {
      "field": "GDP per capita",
      "type": "quantitative",
      "axis": {
        "title": "GDP per capita",
        "grid": false,
        "tickCount": 10,
        "labelOverlap": "greedy"
      }
    },
    "y": {
      "field": "percent",
      "type": "quantitative",
      "axis": {
        "title": "Educational Attainment",
        "grid": false, "format":"%"
      }
    },
    "size": {
      "condition": {
        "selection": "paintbrush",
        "value": 300,
        "init": {
          "value": 70
        }
      },
      "value": 70
    },

    "tooltip": [
       {
        "field": "Year",
        "type": "nominal",
        "title": "Year"
      },
      {
        "field": "Country",
        "type": "ordinal",
        "title": "Country"
      },
      {
        "field": "GDP per capita",
        "type": "nominal",
        "title": "GDP per capita"
      },
      {
        "field": "Educationalattainment",
        "type": "nominal",
        "title": "Educational attainment at least completed short-cycle tertiary population 25+ total (%) (cumulative)"
      }
    ]
  }
}

这是我的参考图

{
  "$schema": "https://vega.github.io/schema/vega-lite/v5.json",
  "description": "Figure 5: Plotting a regression of Social Mobility Index on Global Entrepreneurship Index, equation acquired via Python",
  "data": {
    "url": "https://raw.githubusercontent.com/marinabrts/marinabrts.github.io/main/GEIxSMI.csv",
    "format": {"type": "csv"}
  },
  "background": "#E0E0E0",
  "config": {"axis": {"grid": true, "gridColor": "#FFFFFF"}},
  "title": {
    "text": "Figure 5: Regressing SMI on Global Entrepreneurship Index",
    "subtitle": "Source: World Economic Forum (2020), Global Entrepreneurship & Development Institute (2019)",
    "subtitleFontStyle": "italic",
    "subtitleFontSize": 10,
    "anchor": "start"
  },
  "height": 300,
  "width": 370,
  "layer": [
    {
      "mark": {"type": "point", "size": 30, "color": "#FF3399"},
      "encoding": {
        "x": {
          "field": "GEI",
          "type": "quantitative",
          "title": "Global Entrepreneurship Index (GEI)"
        },
        "y": {
          "field": "Index Score",
          "type": "quantitative",
          "title": "Social Mobility Index (SMI)",
          "scale": {"domain": [30, 90]}
        },
        "tooltip": [
          {"field": "Country", "type": "nominal", "title": "Country"},
          {"field": "GEI", "type": "quantitative", "title": "GEI"},
          {"field": "Index Score", "type": "quantitative", "title": "SMI"}
        ]
      }
    },
    {
      "mark": {"type": "line", "color": "#7F00FF", "size": 3},
      "transform": [{"regression": "Index Score", "on": "GEI"}],
      "encoding": {
        "x": {"field": "GEI", "type": "quantitative"},
        "y": {"field": "Index Score", "type": "quantitative"}
      }
    },
    {
      "transform": [
        {"regression": "Index Score", "on": "GEI", "params": true},
        {"calculate": "'R²= '+format(datum.rSquared, '.2f')", "as": "R2"}
      ],
      "mark": {
        "type": "text",
        "color": "red",
        "size": 14,
        "x": "width",
        "align": "center",
        "y": -5
      },
      "encoding": {"text": {"type": "nominal", "field": "R2"}}
    }
  ]
}

如果有任何帮助,我将不胜感激。非常感谢。

编辑

R平方值代码

{
      "transform": [
        {
          "regression": "GDP per capita",
          "on": "percent",
          "params": true
        },
        {"calculate": "'R²: '+format(datum.rSquared, '.2f')", "as": "R2"}
      ],
      "mark": {
        "type": "text",
        "color": "black",
        "x": "width",
        "align": "right",
        "y": -5
      },
      "encoding": {
        "text": {"type": "nominal", "field": "R2"}
      }
    }

未显示值的完整图表代码

{
  "$schema": "https://vega.github.io/schema/vega-lite/v4.json",
  "title": {
    "text": null,
    "subtitle": null,
    "subtitleFontStyle": "italic",
    "subtitleFontSize": 10,
    "anchor": "start",
    "color": "black"
  },
  "height": 100,
  "width": 100,
  "data": {
    "url": "https://raw.githubusercontent.com/jamieprince/jamieprince.github.io/main/correlation.csv"
  },
  "transform": [
    {"calculate": "datum.Educationalattainment/100", "as": "percent"},
    {"filter": {"field": "Educationalattainment", "gt": 0}}
  ],
  "layer": [
    {
      "selection": {
        "paintbrush": {"type": "multi", "on": "mouseover", "nearest": true},
        "grid": {"type": "interval", "bind": "scales"}
      },
      "mark": {"type": "circle", "opacity": 0.5, "color": "#EC9D3E"},
      "encoding": {
        "x": {
          "field": "GDP per capita",
          "type": "quantitative",
          "axis": {
            "title": "GDP per capita",
            "grid": false,
            "tickCount": 10,
            "labelOverlap": "greedy"
          }
        },
        "y": {
          "field": "percent",
          "type": "quantitative",
          "axis": {
            "title": "Educational Attainment",
            "grid": false,
            "format": "%"
          }
        },
        "size": {
          "condition": {
            "selection": "paintbrush",
            "value": 300,
            "init": {"value": 70}
          },
          "value": 70
        },
        "tooltip": [
          {"field": "Year", "type": "nominal", "title": "Year"},
          {"field": "Country", "type": "ordinal", "title": "Country"},
          {
            "field": "GDP per capita",
            "type": "nominal",
            "title": "GDP per capita"
          },
          {
            "field": "Educationalattainment",
            "type": "nominal",
            "title": "Educational attainment at least completed short-cycle tertiary population 25+ total (%) (cumulative)"
          }
        ]
      }
    },
    {
      "mark": {"type": "line", "color": "#347DB6", "size": 3},
      "transform": [{"regression": "GDP per capita", "on": "percent"}],
      "encoding": {
        "x": {"field": "GDP per capita", "type": "quantitative"},
        "y": {"field": "percent", "type": "quantitative"}
      }
    },
    {
      "transform": [
        {
          "regression": "GDP per capita",
          "on": "percent",
          "params": true
        },
        {"calculate": "'R²: '+format(datum.rSquared, '.2f')", "as": "R2"}
      ],
      "mark": {
        "type": "text",
        "color": "black",
        "x": "width",
        "align": "right",
        "y": -5
      },
      "encoding": {
        "text": {"type": "nominal", "field": "R2"}
      }
    }
  ]
}

您只需在一个将堆叠在一起的层内添加scatter图表和line标记。然后,对line标记执行regression变换。你的问题中提供的变换似乎是错误的,因为没有x或y字段具有Educationalattainment,所以我对percent字段进行了回归,因为它是从Educationalattainment字段计算和导出的:

"transform": [
                {
                    "regression": "GDP per capita",
                    "on": "Educationalattainment",
                }]

以下是修改后的配置或参考编辑器:

{
  "$schema": "https://vega.github.io/schema/vega-lite/v4.json",
  "title": {
    "text": null,
    "subtitle": null,
    "subtitleFontStyle": "italic",
    "subtitleFontSize": 10,
    "anchor": "start",
    "color": "black"
  },
  "height": 100,
  "width": 100,
  "data": {
    "url": "https://raw.githubusercontent.com/jamieprince/jamieprince.github.io/main/correlation.csv"
  },
  "transform": [
    {"calculate": "datum.Educationalattainment/100", "as": "percent"},
    {"filter": {"field": "Educationalattainment", "gt": 0}}
  ],
  "layer": [
    {
      "selection": {
        "paintbrush": {"type": "multi", "on": "mouseover", "nearest": true},
        "grid": {"type": "interval"}
      },
      "mark": {"type": "circle", "opacity": 0.5, "color": "#EC9D3E"},
      "encoding": {
        "x": {
          "field": "GDP per capita",
          "type": "quantitative",
          "axis": {
            "title": "GDP per capita",
            "grid": false,
            "tickCount": 10,
            "labelOverlap": "greedy"
          }
        },
        "y": {
          "field": "percent",
          "type": "quantitative",
          "axis": {
            "title": "Educational Attainment",
            "grid": false,
            "format": "%"
          }
        },
        "size": {
          "condition": {
            "selection": "paintbrush",
            "value": 300,
            "init": {"value": 70}
          },
          "value": 70
        },
        "tooltip": [
          {"field": "Year", "type": "nominal", "title": "Year"},
          {"field": "Country", "type": "ordinal", "title": "Country"},
          {
            "field": "GDP per capita",
            "type": "nominal",
            "title": "GDP per capita"
          },
          {
            "field": "Educationalattainment",
            "type": "nominal",
            "title": "Educational attainment at least completed short-cycle tertiary population 25+ total (%) (cumulative)"
          }
        ]
      }
    },
    {
      "mark": {"type": "line", "color": "#347DB6", "size": 3},
      "transform": [{"regression": "GDP per capita", "on": "percent"}],
      "encoding": {
        "x": {"field": "GDP per capita", "type": "quantitative"},
        "y": {"field": "percent", "type": "quantitative"}
      }
    },
    {
      "transform": [
        {"regression": "GDP per capita", "on": "percent", "params": true},
        {"calculate": "'R²: '+format(datum.rSquared, '.2f')", "as": "R2"}
      ],
      "mark": {
        "type": "text",
        "color": "black",
        "x": "width",
        "align": "right",
        "y": -5
      },
      "encoding": {"text": {"type": "nominal", "field": "R2"}}
    }
  ]
}

编辑

为了显示文本,我已经从您的grid选择中删除了bind配置。删除后,文本正确可见,这可能是一个问题,也可能有一些原因。更新了上面片段中的以下行:

"selection": {
        "paintbrush": {"type": "multi", "on": "mouseover", "nearest": true},
        "grid": {"type": "interval"}
      },

最新更新