在Mapreduce程序中使用multiinput会产生一个错误



我在mapreduce程序中使用MultipleInputs,我在Intellij中本地运行它。我得到这个错误:

Exception in thread "main" java.lang.NoClassDefFoundError: org/apache/hadoop/mapreduce/lib/input/MultipleInputs
    at Plink.PlinkDriver.run(PlinkDriver.java:49)
    at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:65)
    at org.apache.hadoop.util.ToolRunner.run(ToolRunner.java:79)
    at Plink.PlinkDriver.main(PlinkDriver.java:71)
    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:57)
    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
    at java.lang.reflect.Method.invoke(Method.java:606)
    at com.intellij.rt.execution.application.AppMain.main(AppMain.java:140)
Caused by: java.lang.ClassNotFoundException: org.apache.hadoop.mapreduce.lib.input.MultipleInputs
    at java.net.URLClassLoader$1.run(URLClassLoader.java:366)
    at java.net.URLClassLoader$1.run(URLClassLoader.java:355)
    at java.security.AccessController.doPrivileged(Native Method)
    at java.net.URLClassLoader.findClass(URLClassLoader.java:354)
    at java.lang.ClassLoader.loadClass(ClassLoader.java:425)
    at sun.misc.Launcher$AppClassLoader.loadClass(Launcher.java:308)
    at java.lang.ClassLoader.loadClass(ClassLoader.java:358)
    ... 9 more
Process finished with exit code 1
驱动类的代码如下:
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.MultipleInputs;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
/**
 * Created by Sai Bharath on 7/15/2015.
 */
public class PlinkDriver extends Configured implements Tool {
    @Override
    public int run(String[] args) throws Exception {

        if (args.length < 3) {
            System.err.printf("Usage: %s [generic options] <input> <output>n",
                    getClass().getSimpleName());
            ToolRunner.printGenericCommandUsage(System.err);
            return -1;
        }
        Job job = new Job();
        job.setJarByClass(PlinkDriver.class);
        MultipleInputs.addInputPath(job, new Path(args[0]),TextInputFormat.class,PlinkMapper.class);
        MultipleInputs.addInputPath(job, new Path(args[1]),TextInputFormat.class,PlinkMapper2.class);
        FileOutputFormat.setOutputPath(job, new Path(args[2]));
        job.setOutputFormatClass(TextOutputFormat.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);
        job.setReducerClass(PlinkReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

     return job.waitForCompletion(true) ? 0 : 1;
    }
    public static void main(String[] args) throws Exception {
        int exitCode = ToolRunner.run(new PlinkDriver(), args);
        System.exit(exitCode);
    }
}
我使用的pom.xml是:
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>
    <groupId>Mapreduce</groupId>
    <artifactId>Mapreduce</artifactId>
    <version>1.0-SNAPSHOT</version>
    <properties>
        <jdkLevel>1.7</jdkLevel>
        <requiredMavenVersion>[3.3,)</requiredMavenVersion>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <project.build.outputEncoding>UTF-8</project.build.outputEncoding>
    </properties>
    <distributionManagement>
        <repository>
            <id>code-artifacts</id>
            <url>
                http://code/artifacts/content/repositories/releases
            </url>
        </repository>
        <snapshotRepository>
            <id>code-artifacts</id>
            <url>
                http://code/artifacts/content/repositories/snapshots
            </url>
        </snapshotRepository>
    </distributionManagement>
    <build>
        <plugins>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-surefire-plugin</artifactId>
                <version>2.18.1</version>
                <configuration>
                    <skipTests>true</skipTests>
                </configuration>
            </plugin>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-compiler-plugin</artifactId>
                <version>3.3</version>
                <configuration>
                    <source>${jdkLevel}</source>
                    <target>${jdkLevel}</target>
                    <showDeprecation>true</showDeprecation>
                    <showWarnings>true</showWarnings>
                </configuration>
                <dependencies>
                    <dependency>
                        <groupId>org.codehaus.groovy</groupId>
                        <artifactId>groovy-eclipse-compiler</artifactId>
                        <version>2.9.2-01</version>
                    </dependency>
                    <dependency>
                        <groupId>org.codehaus.groovy</groupId>
                        <artifactId>groovy-eclipse-batch</artifactId>
                        <version>2.4.3-01</version>
                    </dependency>
                </dependencies>
            </plugin>
            <plugin>
                <artifactId>maven-dependency-plugin</artifactId>
                <executions>
                    <execution>
                        <phase>package</phase>
                        <goals>
                            <goal>copy-dependencies</goal>
                        </goals>
                        <configuration>
                            <outputDirectory>${project.build.directory}/lib</outputDirectory>
                            <includeScope>provided</includeScope>
                        </configuration>
                    </execution>
                </executions>
            </plugin>
        </plugins>
    </build>
    <repositories>
        <repository>
            <releases>
                <enabled>true</enabled>
                <updatePolicy>always</updatePolicy>
                <checksumPolicy>warn</checksumPolicy>
            </releases>
            <snapshots>
                <enabled>false</enabled>
                <updatePolicy>never</updatePolicy>
                <checksumPolicy>fail</checksumPolicy>
            </snapshots>
            <id>HDPReleases</id>
            <name>HDP Releases</name>
            <url>http://repo.hortonworks.com/content/repositories/releases/</url>
            <layout>default</layout>
        </repository>
    </repositories>
    <dependencies>
        <dependency>
            <groupId>commons-logging</groupId>
            <artifactId>commons-logging</artifactId>
            <version>1.2</version>
        </dependency>
        <dependency>
        <groupId>org.apache.hadoop</groupId>
        <artifactId>hadoop-client</artifactId>
        <version>2.6.0</version>
       <scope>provided</scope>
    </dependency>
        <dependency>
            <groupId>org.apache.hadoop</groupId>
            <artifactId>hadoop-core</artifactId>
            <version>0.20.2</version>
        </dependency>
        <dependency>
            <groupId>log4j</groupId>
            <artifactId>log4j</artifactId>
            <version>1.2.17</version>
        </dependency>
        <dependency>
            <groupId>org.slf4j</groupId>
            <artifactId>slf4j-api</artifactId>
            <version>1.7.5</version>
        </dependency>
        <dependency>
            <groupId>org.testng</groupId>
            <artifactId>testng</artifactId>
            <version>6.8.7</version>
        </dependency>
        <dependency>
            <groupId>org.apache.mrunit</groupId>
            <artifactId>mrunit</artifactId>
            <version>1.0.0</version>
            <classifier>hadoop2</classifier>
        </dependency>
        <dependency>
            <groupId>org.mockito</groupId>
            <artifactId>mockito-core</artifactId>
            <version>1.9.5</version>
            <scope>test</scope>
        </dependency>
        <dependency>
            <groupId>commons-cli</groupId>
            <artifactId>commons-cli</artifactId>
            <version>1.2</version>
        </dependency>
    </dependencies>
</project>
有人能帮我解决这个问题吗?

谢谢。Bharath

MultipleInputs类包含在hadoop-mapreduce-client-core工件中。您的项目有一个provided依赖于hadoop-clienthadoop-client工件是一个薄包装器,它通过传递依赖引入了多个其他Hadoop工件。

https://github.com/apache/hadoop/blob/release-2.6.0/hadoop-client/pom.xml

基于此,我建议做以下修改:

  1. 移除<excludeTransitive>true</excludeTransitive>hadoop-client依赖关系本身只是一个pom.xml,并且需要传递依赖解析才能到达hadoop-mapreduce-client-core和其他Hadoop工件。
  2. <includeScope>runtime</includeScope>更改为<includeScope>provided</includeScope>以匹配<dependency>部分中使用的依赖范围。

maven-dependency-plugin配置正在将依赖项复制到target/lib目录。我假设您的IntelliJ项目已经设置好,可以将这个目录中的所有jar添加到它的类路径中。

相关内容

  • 没有找到相关文章

最新更新