输入不匹配"when" Pyspark,但语句工作的其他情况



当我在jupyter Pyspark环境中运行以下代码时:

max(case when 
lower(drugcode.standard.primaryDisplay) like '%onpattro%'
or lower(drugcode.standard.primaryDisplay) like '%patisiran%'
or (drugcode.standard.CodingSystemid ='2.16.840.1.113883.6.314'
and lower(drugcode.standard.id) in ('d08858'))
or (drugcode.standard.CodingSystemid ='2.16.840.1.113883.6.312'
and lower(drugcode.standard.id) in ('287393', '287396', '287397,'287399','308546'))
or (drugcode.standard.CodingSystemid ='2.16.840.1.113883.6.69'
and lower(drugcode.standard.id) in ('71336100001'))
or (drugcode.standard.CodingSystemid ='2.16.840.1.113883.6.88'
and lower(drugcode.standard.id) in ('2053490','2053521','2053522','2053523','2053524','2053525','2053526','2053527','2053528','2053529','2053530','2053531','2284712','2284714'))
or (drugcode.standard.CodingSystemid ='2.16.840.1.113883.6.96'
and lower(drugcode.standard.id) in ('37498111000001109','773605008','773608005')) then 1 else 0 end) as on_ind,

我得到以下错误:

Py4JJavaError: An error occurred while calling o42.sql.
: org.apache.spark.sql.catalyst.parser.ParseException: 
mismatched input 'when' expecting {')', ','}(line 10, pos 9)

`== SQL ==
'select
tenant as med_tenant, 
personid as med_personid, 
medicationid as med_medicationid, 
encounterid as med_encounterid,
drugcode.standard.codingSystemId as codingSystemId,
drugcode.standard.id as drugCode,
drugcode.standard.primaryDisplay as drugDesc,
max(case when lower(drugcode.standard.primaryDisplay) like '%onpattro%'
---------^^^
or lower(drugcode.standard.primaryDisplay) like '%patisiran%'
or (drugcode.standard.CodingSystemid ='2.16.840.1.113883.6.314'
and lower(drugcode.standard.id) in ('d08858'))
or (drugcode.standard.CodingSystemid ='2.16.840.1.113883.6.312'
and lower(drugcode.standard.id) in ('287393', '287396', '287397,'287399','308546'))
or (drugcode.standard.CodingSystemid ='2.16.840.1.113883.6.69'
and lower(drugcode.standard.id) in ('71336100001'))
or (drugcode.standard.CodingSystemid ='2.16.840.1.113883.6.88'
and lower(drugcode.standard.id) in ('2053490','2053521','2053522','2053523','2053524','2053525','2053526','2053527','2053528','2053529','2053530','2053531','2284712','2284714'))
or (drugcode.standard.CodingSystemid ='2.16.840.1.113883.6.96'
and lower(drugcode.standard.id) in ('37498111000001109','773605008','773608005')) then 1 else 0 end) as on_ind,
max(case when
lower(drugcode.standard.primaryDisplay) like '%givlaari%'
or lower(drugcode.standard.primaryDisplay) like '%givosiran%'
or (drugcode.standard.CodingSystemid ='2.16.840.1.113883.6.314'
and lower(drugcode.standard.id) in ('d09456')) then 1 else 0 end) as giv_ind


FROM medication
WHERE
lower(status.standard.primaryDisplay) not in ('cancelled','canceled','error entry deleted')

GROUP BY
tenant, 
personid, 
medicationid, 
encounterid,
drugcode.standard.codingSystemId,                                                        
drugcode.standard.id,
drugcode.standard.primaryDisplay

at org.apache.spark.sql.catalyst.parser.ParseException.withCommand(ParseDriver.scala:241)
at org.apache.spark.sql.catalyst.parser.AbstractSqlParser.parse(ParseDriver.scala:117)
at org.apache.spark.sql.execution.SparkSqlParser.parse(SparkSqlParser.scala:48)
at org.apache.spark.sql.catalyst.parser.AbstractSqlParser.parsePlan(ParseDriver.scala:69)
at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:642)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.lang.Thread.run(Thread.java:748)

During handling of the above exception, another exception occurred:
ParseException                            Traceback (most recent call last)
<ipython-input-14-8f5dff49e939> in <module>
39     drugcode.standard.id,
40     drugcode.standard.primaryDisplay
---> 41     """)
/usr/local/spark/python/pyspark/sql/session.py in sql(self, sqlQuery)
765         [Row(f1=1, f2=u'row1'), Row(f1=2, f2=u'row2'), Row(f1=3, f2=u'row3')]
766         """
--> 767         return DataFrame(self._jsparkSession.sql(sqlQuery), self._wrapped)
768 
769     @since(2.0)
/usr/local/spark/python/lib/py4j-0.10.7-src.zip/py4j/java_gateway.py in __call__(self, *args)
1255         answer = self.gateway_client.send_command(command)
1256         return_value = get_return_value(
-> 1257             answer, self.gateway_client, self.target_id, self.name)
1258 
1259         for temp_arg in temp_args:
/usr/local/spark/python/pyspark/sql/utils.py in deco(*a, **kw)
71                 raise AnalysisException(s.split(': ', 1)[1], stackTrace)
72             if s.startswith('org.apache.spark.sql.catalyst.parser.ParseException: '):
---> 73                 raise ParseException(s.split(': ', 1)[1], stackTrace)
74             if s.startswith('org.apache.spark.sql.streaming.StreamingQueryException: '):
75                 raise StreamingQueryException(s.split(': ', 1)[1], stackTrace)
ParseException: "nmismatched input 'when' expecting {')', ','}(line 10, pos 9)nn== SQL ==nnselectntenant as med_tenant, npersonid as med_personid, nmedicationid as med_medicationid, nencounterid as med_encounterid,ndrugcode.standard.codingSystemId as codingSystemId,ndrugcode.standard.id as drugCode,ndrugcode.standard.primaryDisplay as drugDesc,nmax(case when lower(drugcode.standard.primaryDisplay) like '%onpattro%'n---------^^^n    or lower(drugcode.standard.primaryDisplay) like '%patisiran%'n    or (drugcode.standard.CodingSystemid ='2.16.840.1.113883.6.314'n    and lower(drugcode.standard.id) in ('d08858'))n    or (drugcode.standard.CodingSystemid ='2.16.840.1.113883.6.312'n    and lower(drugcode.standard.id) in ('287393', '287396', '287397,'287399','308546'))n     or (drugcode.standard.CodingSystemid ='2.16.840.1.113883.6.69'n    and lower(drugcode.standard.id) in ('71336100001'))n    or (drugcode.standard.CodingSystemid ='2.16.840.1.113883.6.88'n    and lower(drugcode.standard.id) in ('2053490','2053521','2053522','2053523','2053524','2053525','2053526','2053527','2053528','2053529','2053530','2053531','2284712','2284714'))n       or (drugcode.standard.CodingSystemid ='2.16.840.1.113883.6.96'n    and lower(drugcode.standard.id) in ('37498111000001109','773605008','773608005')) then 1 else 0 end) as on_ind,nmax(case whenn    lower(drugcode.standard.primaryDisplay) like '%givlaari%'n    or lower(drugcode.standard.primaryDisplay) like '%givosiran%'n    or (drugcode.standard.CodingSystemid ='2.16.840.1.113883.6.314'n    and lower(drugcode.standard.id) in ('d09456')) then 1 else 0 end) as giv_indn    n    nFROM medicationnWHEREn    lower(status.standard.primaryDisplay) not in ('cancelled','canceled','error entry deleted')n    nGROUP BYn    tenant, n    personid, n    medicationid, n    encounterid,n    drugcode.standard.codingSystemId,                                                        n ` drugcode.standard.id,n    drugcode.standard.primaryDisplayn    n"'

但当我运行区块时:

`max(case when
lower(drugcode.standard.primaryDisplay) like '%givlaari%'
or lower(drugcode.standard.primaryDisplay) like '%givosiran%'
or (drugcode.standard.CodingSystemid ='2.16.840.1.113883.6.314'
and lower(drugcode.standard.id) in ('d09456')) then 1 else 0 end) as giv_ind`

这样就行了。对我来说,这两个语句在语法上似乎完全相同,那么为什么第一个语句会抛出错误呢?我尝试过以多种方式更改代码中各个元素的位置,但都没有成功。即使删除case语句,也只会将错误的位置更改为"lower"命令。

我很感激在这方面的任何帮助,因为这对我来说是一个主要的障碍。​

我认为在发布的代码中,您在'287397'处缺少一个引号',请尝试添加单引号并再次运行查询。

max(case when 
lower(drugcode.standard.primaryDisplay) like '%onpattro%'
or lower(drugcode.standard.primaryDisplay) like '%patisiran%'
or (drugcode.standard.CodingSystemid ='2.16.840.1.113883.6.314'
and lower(drugcode.standard.id) in ('d08858'))
or (drugcode.standard.CodingSystemid ='2.16.840.1.113883.6.312'
and lower(drugcode.standard.id) in ('287393', '287396', '287397','287399','308546'))
or (drugcode.standard.CodingSystemid ='2.16.840.1.113883.6.69'
and lower(drugcode.standard.id) in ('71336100001'))
or (drugcode.standard.CodingSystemid ='2.16.840.1.113883.6.88'
and lower(drugcode.standard.id) in ('2053490','2053521','2053522','2053523','2053524','2053525','2053526','2053527','2053528','2053529','2053530','2053531','2284712','2284714'))
or (drugcode.standard.CodingSystemid ='2.16.840.1.113883.6.96'
and lower(drugcode.standard.id) in ('37498111000001109','773605008','773608005')) then 1 else 0 end) as on_ind,

我建议您格式化表达式。

max(expr('''
CASE WHEN 
lower(drugcode.standard.primaryDisplay) like '%onpattro%' or
lower(drugcode.standard.primaryDisplay) like '%patisiran%' or
(
drugcode.standard.CodingSystemid ='2.16.840.1.113883.6.314' and 
lower(drugcode.standard.id) in ('d08858')
) or 
(
drugcode.standard.CodingSystemid ='2.16.840.1.113883.6.312' and 
lower(drugcode.standard.id) in ('287393', '287396', '287397', '287399', '308546')
) or 
(
drugcode.standard.CodingSystemid ='2.16.840.1.113883.6.69' and 
lower(drugcode.standard.id) in ('71336100001')
) or 
(
drugcode.standard.CodingSystemid ='2.16.840.1.113883.6.88' and 
lower(drugcode.standard.id) in ('2053490', '2053521', '2053522', '2053523',' 2053524', '2053525', '2053526', '2053527', '2053528', '2053529', '2053530', '2053531', '2284712', '2284714')
) or 
(
drugcode.standard.CodingSystemid ='2.16.840.1.113883.6.96' and 
lower(drugcode.standard.id) in ('37498111000001109', '773605008', '773608005')
) 
THEN 1 
ELSE 0 
END) as on_ind
'''))

最新更新