Intellexer Summarizer can be easily integrated into custom Document/Knowledge management systems using programming languages C/C++ and C#. Our SDK contains all necessary include files and import libraries for binding user applications with Summarizer module.
Here is a C++ example of how to create summary and extract list of most significant concepts from sample document:
#include <iostream>
#include <string>
#include <SumCore.h>
#include <LPXml.h>
using std::cout;
using std::cerr;
using std::endl;
using std::string;
using namespace NsSemSDK;
/// Print tree brunch.
void PrintTree(ISumTreeNode* pNode, int nLevel)
{
int i;
const char* pszText = pNode->GetText();
if (strlen(pszText) > 0)
{
for (i = 0; i < nLevel - 1; ++i)
cout << '\t';
cout << pszText << endl;
}
for (i = 0; i < pNode->GetChildCount(); ++i)
PrintTree(pNode->GetChild(i), nLevel + 1);
}
/// Print several most significant concepts, sentences and tree brunches.
void PrintSummary(ISummary* pSummary)
{
int nSize;
int i;
// Set current summary type to concept list
pSummary->SetCurrentItem(ESumItemTypeRelation);
// Set desired summary size to 5 concept
nSize = pSummary->RestrictSummary(ESumRestrictionItem, 5);
// Get and print summary concepts with there weights
cout << "List of most informative concepts:" << endl;
for (i = 0; i < nSize; ++i)
{
ISumItem* pItem = pSummary->GetItem(i);
cout << pItem->GetWeight() << '\t' << pItem->GetText() << endl;
}
cout << endl;
// Set current summary type to sentence list
pSummary->SetCurrentItem(ESumItemTypeSentence);
// Print total number of sentences in document
cout << "Total number of sentences: " << pSummary->GetTotalItemCount() << endl;
// Set desired summary size to 3 percent
pSummary->RestrictSummary(ESumRestrictionPercent, 3);
nSize = pSummary->GetSummarySize();
cout << "List of most informative sentences:" << endl;
for (i = 0; i < nSize; ++i)
{
ISumItem* pItem = pSummary->GetItem(i);
cout << pItem->GetRank() << '\t' << pItem->GetText() << endl;
}
cout << endl;
// Get tree of document concepts
ISumTree* pTree = pSummary->GetTree();
// Concept text will contain text from parent tree nodes
pTree->SetFullText(true);
// Set maximum count of nodes on each tree level
pTree->SetChildViewBound(3);
// print summary concept tree
cout << "Document concept tree (top part):" << endl;
PrintTree(pTree->GetTreeRoot(), 0);
cout << endl;
}
/// Find given concept in tree brunch.
ISumTreeNode* FindConcept(ISumTreeNode* pNode, const char* pszConcept)
{
if (strcmp(pNode->GetText(), pszConcept) == 0)
return pNode;
for (int i = 0; i < pNode->GetChildCount(); ++i)
{
ISumTreeNode* pChild = FindConcept(pNode->GetChild(i), pszConcept);
if (pChild != NULL)
return pChild;
}
return NULL;
}
int main(int argc, char* argv[])
{
string sFileName("../Data/ForSummarizer.htm"); // path to source document
if (argc > 1)
{
sFileName = argv[1]; // path to source document
}
try
{
string sDBPath("../../LDB"); //path to ldb
string sLPluginsPath("../../LPlugins"); //path to plugins
string sConceptPos("company");
string sConceptQuery("ingredient");
if (argc == 4)
{
sDBPath = argv[2];
sLPluginsPath = argv[3];
}
// provide path to license file
SetSumLicensePath("../../ISDK_License.xml");
SetLPXMLLicensePath("../../ISDK_License.xml");
cerr << "Initializing\t...";
// create summarizer database interface
CInterfacePtr<ISumDB> pSummarizerDB(CreateSummarizerDB());
// create summarizer interface
CInterfacePtr<ISummarizer> pSummarizer(CreateSummarizer());
ISummary* pSummary = NULL;
//initialize summarizer database interface
pSummarizerDB->Setup(sDBPath.c_str(), sLPluginsPath.c_str());
//initialize summarizer interface
pSummarizer->Setup(pSummarizerDB.Get());
cerr << "Done" << endl;
// summarize source document
pSummarizer->Summarize(sFileName.c_str());
//get summary
pSummary = pSummarizer->GetSummary();
cout << "Current summary\n" << endl;
PrintSummary(pSummary);
// Set big limit on tree to be able to find concept in it
pSummary->GetTree()->SetChildViewBound(1000);
cout << "Positions and precise wording for concept \'" << sConceptPos << "\'\n" << endl;
ISumTreeNode* pConcept = FindConcept(pSummary->GetTree()->GetTreeRoot(), sConceptPos.c_str());
if (pConcept != NULL)
{
// Get and print array of document phrases containing given concept
int i;
CInterfacePtr<ISumPhraseContainer> pPhrases(pConcept->GetPhrases(pSummary));
cout << "Concept phrases: ";
for (i = 0; i < pPhrases->GetCount(); ++i)
cout << pPhrases->GetItem(i) << ", ";
cout << endl;
// Get and print array of item describing position of given concept in current summary
const ISumLocation* pLocation;
CInterfacePtr<ISumLocationContainerRef> pLocations(pConcept->GetLocations(pSummary));
cout << "Concept locations:" << endl;
pLocations->Reset();
while (pLocations->Next(pLocation))
cout << "\tsentence " << pLocation->GetIndex() << " offset ("
<< pLocation->GetStartOffset() << ", " << pLocation->GetEndOffset() << ")" << endl;
cout << endl;
}
cout << "Reorder summary with query: " << sConceptQuery << "\n\n";
pConcept = FindConcept(pSummary->GetTree()->GetTreeRoot(), sConceptQuery.c_str());
if (pConcept != NULL)
{
// mark found concept and all its subconcepts as selected
pConcept->SetStatus(ESumItStSelected, true);
pSummary->Reorder(ESumOrMoQuery);
PrintSummary(pSummary);
}
}
catch (const CSemBaseException& x)
{
// Handle exceptions.
cerr << x.what();
}
return 0;
}
As a result, you get all Intellexer Summarizer output information: the document summary and hierarchical tree of most informative concepts.