使用 Alamofire JSOn 和 HTML 创建通用提取器



我正在尝试启动一个基于网络抓取的项目。我已经为不同的平台设置了工具,用于使用SwiftyJSON,对于原始HTML,我使用hpple。我的问题是我正在尝试为内容设置一些通用类,为内容的提取器设置一些通用类。由于每个操作都是这样的,

登录如果有用户名或密码,请提供。如果它有验证码显示并使用结果使用阿拉莫火获取数据使用 JSON 或 HTML 抓取数据填充内容类。

我想知道是否有办法定义某种协议、枚举或通用模板,以便我可以为每个类定义这些不同的函数。我想如果我不能做到这一点,我会一遍又一遍地编写相同的代码。这就是我想出的。如果您能帮助我正确设置,我将不胜感激。

enum Company:Int {
    case CNN
    case BBC
    case HN
    case SO 
    
    var captcha:Bool {
        switch self {
        case CNN:
            return false
        case BBC:
            return true
        case HN:
            return true
        case SO:
            return false
        }
    }
    var description:String {
        get {
            switch self {
            case CNN:
                return "CNN"
            case BBC:
                return "BBC"
            case HN:
                return "Hacker News"
            case SO:
                return "Stack Overflow"
            }
        }
    }
}
class Fetcher {
    var username:String?
    var password:String?
    var url:String
    var company:Company
    
    init(company: Company, url:String) {
        self.url = url
        self.company = company
    }
    
    init(company: Company, url:String,username:String,password:String) {
        self.url = url
        self.company = company
        self.username = username
        self.password = password
    }
    
    func login() {
        
        if username != nil {
           // login
        }
        if company.captcha {
            //show captcha
        }
    }
    
    func fetch(){
        
    }
    
    func populate() {
        
    }
}
class CNN: Fetcher {
    
    
}

以下是我使用 alamofire 和 alamofire 对象映射器所做的:步骤 1:创建符合可映射协议的模式类。

class StoreListingModal: Mappable {
var store: [StoreModal]?
var status: String?
required init?(_ map: Map){
}
func mapping(map: Map) {
    store <- map["result"]
    status <- map["status"]
}
}

第 2 步:使用泛型类型创建抓取请求:

func getDataFromNetwork<T:Mappable>(urlString: String, completion: (T?, NSError?) -> Void) {
    Alamofire.request(.GET, urlString).responseObject { (response: Response<T, NSError>) in
        guard response.result.isSuccess else{
            print("Error while fetching: (response.result.error)")
            completion(nil, response.result.error)
            return
        }
        if let responseObject = response.result.value{
            print(responseObject)
            completion(responseObject, nil)
        }
    }
}

步骤3:现在您只需要调用此获取函数即可。这可以像这样完成:

self.getDataFromNetwork("your url string") { (userResponse:StoreListingModal?, error) in
    }

您不仅会获得响应对象,而且还会映射到模态类。

好吧,这是一个有趣的练习...

您实际上只需要进一步构建Company枚举,以使Fetcher更加抽象。这里有一种方法,它只是稍微修改了你自己的方法,应该让你更接近你想要实现的目标。这是基于我以前对你的另一个问题的答复。

公司

enum Company: Printable, URLRequestConvertible {
    case CNN, BBC, HN, SO
    var captcha: Bool {
        switch self {
        case CNN:
            return false
        case BBC:
            return true
        case HN:
            return true
        case SO:
            return false
        }
    }
    var credentials: (username: String, password: String)? {
        switch self {
        case CNN:
            return ("cnn_username", "cnn_password")
        case BBC:
            return nil
        case HN:
            return ("hn_username", "hn_password")
        default:
            return nil
        }
    }
    var description: String {
        switch self {
        case CNN:
            return "CNN"
        case BBC:
            return "BBC"
        case HN:
            return "Hacker News"
        case SO:
            return "Stack Overflow"
        }
    }
    var loginURLRequest: NSURLRequest {
        var URLString: String?
        switch self {
        case CNN:
            URLString = "cnn_login_url"
        case BBC:
            URLString = "bbc_login_url"
        case HN:
            URLString = "hn_login_url"
        case SO:
            URLString = "so_login_url"
        }
        return NSURLRequest(URL: NSURL(string: URLString!)!)
    }
    var URLRequest: NSURLRequest {
        var URLString: String?
        switch self {
        case CNN:
            URLString = "cnn_url"
        case BBC:
            URLString = "bbc_url"
        case HN:
            URLString = "hn_url"
        case SO:
            URLString = "so_url"
        }
        return NSURLRequest(URL: NSURL(string: URLString!)!)
    }
}

新闻

struct News {
    let title: String
    let content: String
    let date: NSDate
    let author: String
}

取取器

class Fetcher {
    typealias FetchNewsSuccessHandler = [News] -> Void
    typealias FetchNewsFailureHandler = (NSHTTPURLResponse?, AnyObject?, NSError?) -> Void
    // MARK: - Fetch News Methods
    class func fetchNewsFromCompany(company: Company, success: FetchNewsSuccessHandler, failure: FetchNewsFailureHandler) {
        login(
            company: company,
            success: { apiKey in
                Fetcher.fetch(
                    company: company,
                    apiKey: apiKey,
                    success: { news in
                        success(news)
                    },
                    failure: { response, json, error in
                        failure(response, json, error)
                    }
                )
            },
            failure: { response, json, error in
                failure(response, json, error)
            }
        )
    }
    // MARK: - Private - Helper Methods
    private class func login(
        #company: Company,
        success: (String) -> Void,
        failure: (NSHTTPURLResponse?, AnyObject?, NSError?) -> Void)
    {
        if company.captcha {
            // You'll need to figure this part out on your own. First off, I'm not really sure how you
            // would do it, and secondly, I think there may be legal implications of doing this.
        }
        let request = Alamofire.request(company.loginURLRequest)
        if let credentials = company.credentials {
            request.authenticate(username: credentials.username, password: credentials.password)
        }
        request.responseJSON { _, response, json, error in
            if let error = error {
                failure(response, json, error)
            } else {
                // NOTE: You'll need to parse here...I would suggest using SwiftyJSON
                let apiKey = "12345678"
                success(apiKey)
            }
        }
    }
    private class func fetch(
        #company: Company,
        apiKey: String,
        success: FetchNewsSuccessHandler,
        failure: FetchNewsFailureHandler)
    {
        let request = Alamofire.request(company.URLRequest)
        request.responseJSON { _, _, json, error in
            if let error = error {
                failure(response, json, error)
            } else {
                // NOTE: You'll need to parse here...I would suggest using SwiftyJSON
                let news = [News]()
                success(news)
            }
        }
    }
}

示例视图控制器调用提取程序

class SomeViewController: UIViewController {
    override func viewDidLoad() {
        super.viewDidLoad()
        Fetcher.fetchNewsFromCompany(
            Company.CNN,
            success: { newsList in
                for news in newsList {
                    println("(news.title) - (news.date)")
                }
            },
            failure { response, data, error in
                println("(response) (error)")
            }
        )
    }
}

通过允许Company对象流经您的Fetcher,您永远不必在 Fetcher 中跟踪公司的状态。它都可以直接存储在枚举中。

希望有帮助。干杯。

最新更新