Thursday, March 28, 2013

IBM BigData approach: BigInsights

Hadoop and BigData have been two tremendous hot topic lately.


Although many people want to dig into Hadoop and enjoy the benefits of Big Data, most of them don't know exactly how to do it or where to start it. This is where BigInsights is most beneficial.

BigInsights is the Apache Hadoop related software from IBM, and its many built-in features and capabilities leverage your start point.

First, besides having all Hadoop  ecosystem components (Hadoop, Hbase, Hive, Pig, Oozie, Zookeeper, Flume, Avro and Lucene) already working together and tested, it has a very easy-to-use install utility.

If you have ever downloaded and installed Hadoop and all its components, and tried to make sure everything was working, you should know how much time a automatic installer can save.


The principal value brought by BigInsights is, in my opinion, the friendly web-interface of the Hadoop tools. You don't have to program on "vim" or create MapReduce Java applications. You can use web tools, in a spreasheet-interface utility, to run queries on you data.
You can import and export data to your cluster through the web-interface, and manage it too.







I wrote a book about BigInsights, describing what it is, how to install it and how to use it.
You can find it here:
http://www.redbooks.ibm.com/Redbooks.nsf/RedpieceAbstracts/sg248077.html?Open

You can download the free version here.

Wednesday, March 20, 2013

Dummy Mahout Recommender System Example

I already talked about the Open Source Apache Mahout here, and now I'll show a dummy dummy first example of how to use its recommender system.

It is a basic Java example that I used to try out Mahout. Hope it helps people starting to work with it.


 

package myexample;

import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.impl.model.XmlFile;
import org.apache.mahout.cf.taste.impl.recommender.CachingRecommender;
import org.apache.mahout.cf.taste.impl.recommender.GenericItemBasedRecommender;
import org.apache.mahout.cf.taste.impl.similarity.LogLikelihoodSimilarity;
import org.apache.mahout.cf.taste.impl.recommender.slopeone.SlopeOneRecommender;
import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity;
import org.apache.mahout.cf.taste.similarity.ItemSimilarity;
import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
import org.apache.mahout.cf.taste.similarity.UserSimilarity;
import org.apache.mahout.cf.taste.recommender.Recommender;
import org.apache.mahout.cf.taste.recommender.RecommendedItem;
import org.apache.mahout.cf.taste.recommender.ItemBasedRecommender;
import org.apache.mahout.cf.taste.impl.neighborhood.NearestNUserNeighborhood;
import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender;
import org.apache.mahout.cf.taste.impl.similarity.AveragingPreferenceInferrer;

import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.List;

/*
 * Renata Ghisloti - Dummy Mahout Example
 */


public class GeneralRecommender {

  public static void main(String[] args) throws IOException, TasteException, SAXException, ParserConfigurationException {

    String recsFile = args[0];
    long userId = Long.parseLong(args[1]);
    String categoriesFile = args[2];
    String outputPlace = args[3];
    Integer neighborhoodSize = Integer.parseInt(args[4]);
    Integer method = 0;
    String version = null;

    if(args.length >= 6 )
    {
        method  = Integer.parseInt(args[5]);
        version = args[6];
    }

    //Default - needed to initiate the recommendation
    InputSource is = new InputSource(new FileInputStream(recsFile));
    SAXParserFactory factory = SAXParserFactory.newInstance();
    factory.setValidating(false);
    SAXParser sp = factory.newSAXParser();
    ContentHandler handler = new ContentHandler();
    sp.parse(is, handler);

    //Here is were you should load your own input
    XmlFile dataModel = new XmlFile(new File(recsFile));

    switch(method){
      case 0:
       recommenderItemBased(dataModel, userId , categoriesFile, outputPlace, handler, version);
       break;
      case 1:
       recommenderItemBased(dataModel, userId , categoriesFile, outputPlace, handler, version);
       break;
      case 2:
       recommenderSlopeOne(dataModel, userId , categoriesFile, outputPlace, handler);
       break;
      case 3:
       recommenderUserBased(dataModel, userId , categoriesFile, outputPlace, handler, neighborhoodSize, version);
       break;
    }
  }

    //Item Based Recommender System
    public static void recommenderItemBased(XmlFile dataModel, long userId ,
        String categoriesFile, String outputPlace, ContentHandler handler, String version) throws  TasteException{

        System.out.println("Recommending with Item Based");
        ItemSimilarity itemSimilarity;

        if(version == "LogLikelihoodSimilarity")
            itemSimilarity = new LogLikelihoodSimilarity(dataModel);
        else {
            itemSimilarity = new PearsonCorrelationSimilarity(dataModel); 
            System.out.println("Recommending with Item Based Pearson");
        }
        ItemBasedRecommender recommender =
            new GenericItemBasedRecommender(dataModel, itemSimilarity);

        //Just get top 5 recommendations
        List recommendations =
            recommender.recommend(userId, 5);

        //This is were you should add your own print output method
        PrintXml.printRecs(dataModel, userId, recommendations, handler.map, categoriesFile, outputPlace);
    }


    //Slope One Recommender System
    public static void recommenderSlopeOne(XmlFile dataModel, long userId ,
        String categoriesFile, String outputPlace, ContentHandler handler) throws  TasteException{

        System.out.println("Recommending with Slope One");

        CachingRecommender cachingRecommender = new CachingRecommender(new SlopeOneRecommender(dataModel));

        List recommendations =
            cachingRecommender.recommend(userId, 5);

        PrintXml.printRecs(dataModel, userId, recommendations, handler.map, categoriesFile, outputPlace);
    }


    //User based Recommender System
    public static void recommenderUserBased(XmlFile dataModel, long userId ,
        String categoriesFile, String outputPlace, ContentHandler handler, Integer neighborhoodSize, String version) throws  TasteException{

        System.out.println("Recommending with User Based");
        UserSimilarity userSimilarity;

        if(version == "LogLikelihoodSimilarity")
            userSimilarity = new LogLikelihoodSimilarity(dataModel);
        else
            userSimilarity = new PearsonCorrelationSimilarity(dataModel);

        userSimilarity.setPreferenceInferrer(new AveragingPreferenceInferrer(dataModel));

        UserNeighborhood neighborhood =
            new NearestNUserNeighborhood(neighborhoodSize, userSimilarity, dataModel);

        Recommender recommender =
            new GenericUserBasedRecommender(dataModel, neighborhood, userSimilarity);

        List recommendations =
            recommender.recommend(userId, 5);

    PrintXml.printRecs(dataModel, userId, recommendations, handler.map, categoriesFile, outputPlace);
    }
}