当我在jupyter Pyspark环境中运行以下代码时:
max(case when
lower(drugcode.standard.primaryDisplay) like '%onpattro%'
or lower(drugcode.standard.primaryDisplay) like '%patisiran%'
or (drugcode.standard.CodingSystemid ='2.16.840.1.113883.6.314'
and lower(drugcode.standard.id) in ('d08858'))
or (drugcode.standard.CodingSystemid ='2.16.840.1.113883.6.312'
and lower(drugcode.standard.id) in ('287393', '287396', '287397,'287399','308546'))
or (drugcode.standard.CodingSystemid ='2.16.840.1.113883.6.69'
and lower(drugcode.standard.id) in ('71336100001'))
or (drugcode.standard.CodingSystemid ='2.16.840.1.113883.6.88'
and lower(drugcode.standard.id) in ('2053490','2053521','2053522','2053523','2053524','2053525','2053526','2053527','2053528','2053529','2053530','2053531','2284712','2284714'))
or (drugcode.standard.CodingSystemid ='2.16.840.1.113883.6.96'
and lower(drugcode.standard.id) in ('37498111000001109','773605008','773608005')) then 1 else 0 end) as on_ind,
我得到以下错误:
Py4JJavaError: An error occurred while calling o42.sql.
: org.apache.spark.sql.catalyst.parser.ParseException:
mismatched input 'when' expecting {')', ','}(line 10, pos 9)
`== SQL ==
'select
tenant as med_tenant,
personid as med_personid,
medicationid as med_medicationid,
encounterid as med_encounterid,
drugcode.standard.codingSystemId as codingSystemId,
drugcode.standard.id as drugCode,
drugcode.standard.primaryDisplay as drugDesc,
max(case when lower(drugcode.standard.primaryDisplay) like '%onpattro%'
---------^^^
or lower(drugcode.standard.primaryDisplay) like '%patisiran%'
or (drugcode.standard.CodingSystemid ='2.16.840.1.113883.6.314'
and lower(drugcode.standard.id) in ('d08858'))
or (drugcode.standard.CodingSystemid ='2.16.840.1.113883.6.312'
and lower(drugcode.standard.id) in ('287393', '287396', '287397,'287399','308546'))
or (drugcode.standard.CodingSystemid ='2.16.840.1.113883.6.69'
and lower(drugcode.standard.id) in ('71336100001'))
or (drugcode.standard.CodingSystemid ='2.16.840.1.113883.6.88'
and lower(drugcode.standard.id) in ('2053490','2053521','2053522','2053523','2053524','2053525','2053526','2053527','2053528','2053529','2053530','2053531','2284712','2284714'))
or (drugcode.standard.CodingSystemid ='2.16.840.1.113883.6.96'
and lower(drugcode.standard.id) in ('37498111000001109','773605008','773608005')) then 1 else 0 end) as on_ind,
max(case when
lower(drugcode.standard.primaryDisplay) like '%givlaari%'
or lower(drugcode.standard.primaryDisplay) like '%givosiran%'
or (drugcode.standard.CodingSystemid ='2.16.840.1.113883.6.314'
and lower(drugcode.standard.id) in ('d09456')) then 1 else 0 end) as giv_ind
FROM medication
WHERE
lower(status.standard.primaryDisplay) not in ('cancelled','canceled','error entry deleted')
GROUP BY
tenant,
personid,
medicationid,
encounterid,
drugcode.standard.codingSystemId,
drugcode.standard.id,
drugcode.standard.primaryDisplay
at org.apache.spark.sql.catalyst.parser.ParseException.withCommand(ParseDriver.scala:241)
at org.apache.spark.sql.catalyst.parser.AbstractSqlParser.parse(ParseDriver.scala:117)
at org.apache.spark.sql.execution.SparkSqlParser.parse(SparkSqlParser.scala:48)
at org.apache.spark.sql.catalyst.parser.AbstractSqlParser.parsePlan(ParseDriver.scala:69)
at org.apache.spark.sql.SparkSession.sql(SparkSession.scala:642)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244)
at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357)
at py4j.Gateway.invoke(Gateway.java:282)
at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132)
at py4j.commands.CallCommand.execute(CallCommand.java:79)
at py4j.GatewayConnection.run(GatewayConnection.java:238)
at java.lang.Thread.run(Thread.java:748)
During handling of the above exception, another exception occurred:
ParseException Traceback (most recent call last)
<ipython-input-14-8f5dff49e939> in <module>
39 drugcode.standard.id,
40 drugcode.standard.primaryDisplay
---> 41 """)
/usr/local/spark/python/pyspark/sql/session.py in sql(self, sqlQuery)
765 [Row(f1=1, f2=u'row1'), Row(f1=2, f2=u'row2'), Row(f1=3, f2=u'row3')]
766 """
--> 767 return DataFrame(self._jsparkSession.sql(sqlQuery), self._wrapped)
768
769 @since(2.0)
/usr/local/spark/python/lib/py4j-0.10.7-src.zip/py4j/java_gateway.py in __call__(self, *args)
1255 answer = self.gateway_client.send_command(command)
1256 return_value = get_return_value(
-> 1257 answer, self.gateway_client, self.target_id, self.name)
1258
1259 for temp_arg in temp_args:
/usr/local/spark/python/pyspark/sql/utils.py in deco(*a, **kw)
71 raise AnalysisException(s.split(': ', 1)[1], stackTrace)
72 if s.startswith('org.apache.spark.sql.catalyst.parser.ParseException: '):
---> 73 raise ParseException(s.split(': ', 1)[1], stackTrace)
74 if s.startswith('org.apache.spark.sql.streaming.StreamingQueryException: '):
75 raise StreamingQueryException(s.split(': ', 1)[1], stackTrace)
ParseException: "nmismatched input 'when' expecting {')', ','}(line 10, pos 9)nn== SQL ==nnselectntenant as med_tenant, npersonid as med_personid, nmedicationid as med_medicationid, nencounterid as med_encounterid,ndrugcode.standard.codingSystemId as codingSystemId,ndrugcode.standard.id as drugCode,ndrugcode.standard.primaryDisplay as drugDesc,nmax(case when lower(drugcode.standard.primaryDisplay) like '%onpattro%'n---------^^^n or lower(drugcode.standard.primaryDisplay) like '%patisiran%'n or (drugcode.standard.CodingSystemid ='2.16.840.1.113883.6.314'n and lower(drugcode.standard.id) in ('d08858'))n or (drugcode.standard.CodingSystemid ='2.16.840.1.113883.6.312'n and lower(drugcode.standard.id) in ('287393', '287396', '287397,'287399','308546'))n or (drugcode.standard.CodingSystemid ='2.16.840.1.113883.6.69'n and lower(drugcode.standard.id) in ('71336100001'))n or (drugcode.standard.CodingSystemid ='2.16.840.1.113883.6.88'n and lower(drugcode.standard.id) in ('2053490','2053521','2053522','2053523','2053524','2053525','2053526','2053527','2053528','2053529','2053530','2053531','2284712','2284714'))n or (drugcode.standard.CodingSystemid ='2.16.840.1.113883.6.96'n and lower(drugcode.standard.id) in ('37498111000001109','773605008','773608005')) then 1 else 0 end) as on_ind,nmax(case whenn lower(drugcode.standard.primaryDisplay) like '%givlaari%'n or lower(drugcode.standard.primaryDisplay) like '%givosiran%'n or (drugcode.standard.CodingSystemid ='2.16.840.1.113883.6.314'n and lower(drugcode.standard.id) in ('d09456')) then 1 else 0 end) as giv_indn n nFROM medicationnWHEREn lower(status.standard.primaryDisplay) not in ('cancelled','canceled','error entry deleted')n nGROUP BYn tenant, n personid, n medicationid, n encounterid,n drugcode.standard.codingSystemId, n ` drugcode.standard.id,n drugcode.standard.primaryDisplayn n"'
但当我运行区块时:
`max(case when
lower(drugcode.standard.primaryDisplay) like '%givlaari%'
or lower(drugcode.standard.primaryDisplay) like '%givosiran%'
or (drugcode.standard.CodingSystemid ='2.16.840.1.113883.6.314'
and lower(drugcode.standard.id) in ('d09456')) then 1 else 0 end) as giv_ind`
这样就行了。对我来说,这两个语句在语法上似乎完全相同,那么为什么第一个语句会抛出错误呢?我尝试过以多种方式更改代码中各个元素的位置,但都没有成功。即使删除case语句,也只会将错误的位置更改为"lower"命令。
我很感激在这方面的任何帮助,因为这对我来说是一个主要的障碍。
我认为在发布的代码中,您在'287397'
处缺少一个单引号'
,请尝试添加单引号并再次运行查询。
max(case when
lower(drugcode.standard.primaryDisplay) like '%onpattro%'
or lower(drugcode.standard.primaryDisplay) like '%patisiran%'
or (drugcode.standard.CodingSystemid ='2.16.840.1.113883.6.314'
and lower(drugcode.standard.id) in ('d08858'))
or (drugcode.standard.CodingSystemid ='2.16.840.1.113883.6.312'
and lower(drugcode.standard.id) in ('287393', '287396', '287397','287399','308546'))
or (drugcode.standard.CodingSystemid ='2.16.840.1.113883.6.69'
and lower(drugcode.standard.id) in ('71336100001'))
or (drugcode.standard.CodingSystemid ='2.16.840.1.113883.6.88'
and lower(drugcode.standard.id) in ('2053490','2053521','2053522','2053523','2053524','2053525','2053526','2053527','2053528','2053529','2053530','2053531','2284712','2284714'))
or (drugcode.standard.CodingSystemid ='2.16.840.1.113883.6.96'
and lower(drugcode.standard.id) in ('37498111000001109','773605008','773608005')) then 1 else 0 end) as on_ind,
我建议您格式化表达式。
max(expr('''
CASE WHEN
lower(drugcode.standard.primaryDisplay) like '%onpattro%' or
lower(drugcode.standard.primaryDisplay) like '%patisiran%' or
(
drugcode.standard.CodingSystemid ='2.16.840.1.113883.6.314' and
lower(drugcode.standard.id) in ('d08858')
) or
(
drugcode.standard.CodingSystemid ='2.16.840.1.113883.6.312' and
lower(drugcode.standard.id) in ('287393', '287396', '287397', '287399', '308546')
) or
(
drugcode.standard.CodingSystemid ='2.16.840.1.113883.6.69' and
lower(drugcode.standard.id) in ('71336100001')
) or
(
drugcode.standard.CodingSystemid ='2.16.840.1.113883.6.88' and
lower(drugcode.standard.id) in ('2053490', '2053521', '2053522', '2053523',' 2053524', '2053525', '2053526', '2053527', '2053528', '2053529', '2053530', '2053531', '2284712', '2284714')
) or
(
drugcode.standard.CodingSystemid ='2.16.840.1.113883.6.96' and
lower(drugcode.standard.id) in ('37498111000001109', '773605008', '773608005')
)
THEN 1
ELSE 0
END) as on_ind
'''))