Table of Contents
API Sample Code
Set Up API Profile
The following source code example illustrates how to use the API profile and instantiate the API's JavaEightyLegsConnector object:
// Instantiate an instance of APIProfile and set the following parameters:
// 1. VersionId - Set to "1.0".
// 2. ApiToken - Set this to the value you obtained from the web portal.
APIProfile profile = new APIProfile();
profile.Version = "1.0";
profile.ApiAuthToken = "[Your API Token]";
// Instantiate an instance of EightyLegsConnector using the APIProfile as the parameter.
IEightyLegsConnector connector = new EightyLegsConnector (profile);
The following source code example illustrates how to upload a jar and use it to create a simple job through the 80legs API:
using System;
using EightyLegs.Domain.Job;
using System.IO;
using EightyLegs.Customer.API;
using EightyLegs.Customer;
using EightyLegs.Domain.Users;
using EightyLegs.Domain.Enumerations;
using EightyLegs.Utilities;
namespace WindowsFormsApplication2
{
public class TestUploadCodeCreateAJobAPI
{
private IEightyLegsConnector connector;
public void performTests()
{
try
{
APIProfile profile = new APIProfile();
profile.Version = "1.0";
profile.ApiAuthToken = "[Your API Token]";
connector = new EightyLegsConnector(profile);
//upload code
String fileName = "80legsWebAnalysisCode.jar";
FileInfo f = new FileInfo("C://test//" + fileName);
int uploadedCodeId = connector.UploadCode(f, "Sample Code");
JobSetting job = new JobSetting();
job.Name = "Job Testing With Code Upload " + DateConversionUtility.ConvertToUnixTimestamp(DateTime.Now);
job.EnvironmentType = EnvironmentType.LIVE;
job.FrequencyType = FrequencyType.DOES_NOT_REPEAT;
// Optional. Provides recurring options. If the frequency type is set to DOES_NOT_REPEAT, the interval is defaulted to -1.
// Otherwise, the interval is defaulted to 1. In the other cases, the interval should be greater than or equal to 1.
job.FrequencyInterval = -1;
// Crawl Settings
CrawlSetting crawlRequest = new CrawlSetting();
crawlRequest.SeedList.Add("http://dir.yahoo.com");
crawlRequest.CrawlRegularExpression = "";
crawlRequest.MimeTypeList.Add("text");
crawlRequest.OutgoingLinkToCrawl = OutgoingLinkType.LINKS_FROM_SAME_FULLY_QUALIFIED_DOMAIN;
crawlRequest.IsPreservingQueryStringWhenCrawling = true;
crawlRequest.MaxNumberOfUrls = 10;
crawlRequest.MaxNumberOfUrlsPerPage = 1000;
crawlRequest.DepthLevel = 25;
crawlRequest.CrawlType = CrawlType.FAST;
job.CrawlSetting = crawlRequest;
// Analysis Settings
AnalysisSetting analysisReq = new AnalysisSetting();
analysisReq.AnalysisRegularExpression = "";
analysisReq.MimeTypeList.Add("text");
//Use the code that was just uploaded
analysisReq.AnalysisMethod = AnalysisMethod.CODE;
analysisReq.CodeId = uploadedCodeId;
job.AnalysisSetting = analysisReq;
// Result Settings
ResultSetting resultSetting = new ResultSetting();
resultSetting.ResultType = ResultType.CODE_RESULTS;
resultSetting.HasCrawlUrlsInResult = true;
resultSetting.MaxResultFileSizeInMB = 100;
job.ResultSetting = resultSetting;
// calls the API create job method to create the job
int jobId = connector.CreateJob(job, false);
Console.WriteLine("Created job id: " + jobId);
}
catch (Exception e)
{
Console.WriteLine(e.Message);
}
}
}
}
The JobSetting object is a basic job object that has the fields required to create a job.
public class JobGenerator
{
public static JobSetting GenerateJob()
{
JobSetting job = new JobSetting();
job.Name = "AVAutoTest" + DateConversionUtility.ConvertToUnixTimestamp(DateTime.Now);
job.EnvironmentType = EnvironmentType.LIVE;
job.FrequencyType = FrequencyType.DOES_NOT_REPEAT;
// Optional. Provides recurring options. If the frequency type is set to DOES_NOT_REPEAT, the interval is defaulted to -1.
// Otherwise, the interval is defaulted to 1. In the other cases, the interval should be greater than or equal to 1.
job.FrequencyInterval = -1;
// Crawl Settings
CrawlSetting crawlRequest = new CrawlSetting();
//crawlRequest.SeedListId=155);
crawlRequest.SeedList.Add("http://dir.yahoo.com");
//crawlRequest.getSeedList().add("iontelevision.com");
//if seed not added to seeedlist and need to associate with an already uploaded seedlist
//crawlRequest.SeedListId(seedListId);
crawlRequest.CrawlRegularExpression = "^.+crawlexpression.com.*";
crawlRequest.MimeTypeList.Add("text");
crawlRequest.OutgoingLinkToCrawl = OutgoingLinkType.CRAWL_ALL_LINKS;
crawlRequest.IsPreservingQueryStringWhenCrawling = true;
crawlRequest.MaxNumberOfUrls = 100;
crawlRequest.MaxNumberOfUrlsPerPage = 100;
crawlRequest.DepthLevel = 3;
crawlRequest.CrawlType = CrawlType.FAST;
job.CrawlSetting = crawlRequest;
// Analysis Settings
AnalysisSetting analysisReq = new AnalysisSetting();
analysisReq.AnalysisRegularExpression = "^.+yahoo\\.com.*";
analysisReq.MimeTypeList.Add("text");
analysisReq.AnalysisMethod = AnalysisMethod.REGULAR_EXPRESSION_LIST;
analysisReq.AnalysisMethodList.Add("yahoo");
job.AnalysisSetting = analysisReq;
// Result Settings
ResultSetting resultSetting = new ResultSetting();
resultSetting.ResultType = ResultType.COUNT_ARRAY;
resultSetting.HasCrawlUrlsInResult = true;
resultSetting.MaxResultFileSizeInMB = 100;
job.ResultSetting = resultSetting;
return job;
}
}
The following source code example illustrates how to check to see if a job run is completed and downloads the results of a job run through the 80legs API:
using System;
using EightyLegs.Domain.Job;
using EightyLegs.Customer.API;
using EightyLegs.Customer;
using EightyLegs.Domain.Users;
using EightyLegs.Domain.Enumerations;
using System.Collections.Generic;
namespace WindowsFormsApplication2
{
public class TestUploadCodeCreateAJobAPI
{
private IEightyLegsConnector connector;
public void performTests()
{
try
{
APIProfile profile = new APIProfile();
profile.Version = "1.0";
profile.ApiAuthToken= "[Your API Token]";
connector = new EightyLegsConnector(profile);
IList<JobRun> job = null;
job = (IList<JobRun>)connector.RetrieveJobRuns(2195);
foreach (JobRun run in job)
{
IList<RunResult> results = null;
try
{
//Downloads the run result information. This included the crawl and analysis files.
results = (IList<RunResult>)connector.RetrieveRunResultsInfo(run.Id);
if (results.Count > 0)
{
//Download each of the result file separately.
foreach (RunResult result in results)
{
IList<String> returnMessage = null;
returnMessage = connector.DownloadResults(results, "C:\\downloadedResult\\");
Console.WriteLine("Downloaded file name: " + returnMessage);
}
}
}
catch (Exception e)
{
Console.WriteLine(e.Message);
}
}
}
catch (Exception e)
{
Console.WriteLine(e.StackTrace);
}
}
}
}
The following source code example illustrates how to download the results of a job run for crawl packages through the 80legs API:
using System;
using System.Collections;
using System.Collections.Generic;
using EightyLegs.Customer.API;
using EightyLegs.Domain.CrawlPackage;
using EightyLegs.Domain.Enumerations;
using EightyLegs.Domain.Exception;
using EightyLegs.Domain.Job;
using EightyLegs.Domain.Users;
namespace WindowsFormsApplication
{
public class TestCrawlPackage
{
IEightyLegsConnector connector;
public void performTests()
{
try
{
APIProfile profile = new APIProfile();
profile.Version = "1.0";
profile.ApiAuthToken = "[Your API Token]";
connector = new EightyLegsConnector(profile);
//Retrieve all the crawl package accessible by user.
IList<CrawlPackage> crawlPkgsList = connector.RetrieveAvailableCrawlPackagesByUser();
foreach (CrawlPackage crawlPkg in crawlPkgsList)
{
//Retrieves all the jobs for the crawl package
IList<JobSummary> jobSummaryList = connector.RetrieveJobsForCrawlPackage(JobStatusType.ALL, crawlPkg.ID);
foreach (JobSummary jobSummary in jobSummaryList)
{
IList<JobRun> runs = connector.RetrieveJobRunsForCrawlPackage(jobSummary.Id, crawlPkg.ID);
// ///////////////////////////////////
// /how to process crawl package
// /////////////////////////////////////////
foreach (JobRun run in runs)
{
if (run.RunResults.Count > 0)
{
foreach (RunResult result in run.RunResults)
{
//downloads the result file.
String resultFileName = connector.DownloadResultFile(result, null, "C:\\etc\\computationalcrawling\\downloadedResult\\", crawlPkg.ID);
Console.WriteLine(resultFileName);
}
}
}
}
}
}
catch (EightyLegsCommonException e)
{
Console.WriteLine(e.StackTrace);
}
}
}
}
Extract Data from .80 File
The following source code is an example of how to extract data from .80 file. The example is courtesy of one of our user: Jeremy.
public static System.Collections.Hashtable ExtractData(string inputFile)
{
System.Collections.Hashtable result = new
System.Collections.Hashtable();
Dictionary<string, byte[]> results =
EightylegsCustomerResults.CustomerResults.ReadFile(inputFile);
foreach (string url in results.Keys)
{
string data = System.Text.Encoding.ASCII.GetString(results[url]);
result.Add(url, data);
}
return result;
}
More information on how to get results is available on http://wiki.80legs.com/w/page/1114628/Results.
Comments (2)
M Omar Tariq said
at 1:44 pm on Oct 4, 2009
To Declare the API Token the syntax should be as follows :
profile.ApiAuthToken = "[Your API Token]";
according to DOTNET API version
Aliya said
at 3:10 pm on Feb 12, 2010
Thank you Omar. I have made the changes above.
You don't have permission to comment on this page.