{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from flashtext import KeywordProcessor\n",
    "import pandas as pd\n",
    "from pathlib import Path\n",
    "from collections import defaultdict\n",
    "from IPython.display import display, HTML"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[WindowsPath('../data/v1/WikiCSSH_categories.csv'),\n",
       " WindowsPath('../data/v1/WikiCSSH_category2page.csv'),\n",
       " WindowsPath('../data/v1/WikiCSSH_category_links.csv'),\n",
       " WindowsPath('../data/v1/WikiCSSH_category_links_all.csv'),\n",
       " WindowsPath('../data/v1/Wikicssh_core_categories.csv'),\n",
       " WindowsPath('../data/v1/WikiCSSH_page2redirect.csv')]"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "wikicssh_path = Path(\"../data/v1\")\n",
    "wikicssh_files = list(wikicssh_path.glob(\"./*.csv\"))\n",
    "wikicssh_files"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Wall time: 20.4 s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "page2cats = (\n",
    "    pd.read_csv('../data/v1/WikiCSSH_category2page.csv')\n",
    "    .groupby(\"page_title\")\n",
    "    .cat_title\n",
    "    .agg(lambda x: list(x))\n",
    "    .to_dict()\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>category</th>\n",
       "      <th>level</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>Computer_science</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>Mathematics</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>Information_science</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>Computer_engineering</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>Statistics</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "               category  level\n",
       "0      Computer_science      1\n",
       "1           Mathematics      1\n",
       "2   Information_science      1\n",
       "3  Computer_engineering      1\n",
       "4            Statistics      1"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pd.read_csv(wikicssh_files[4]).head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "processor = KeywordProcessor()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Wall time: 124 ms\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "# categories\n",
    "processor.add_keywords_from_dict(\n",
    "    {\n",
    "        f'Category:{k}': [f'{k.lower().replace(\"_\", \" \")}']\n",
    "        for k in pd.read_csv(\"../data/v1/WikiCSSH_categories.csv\").category.values\n",
    "    }\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Wall time: 8.44 s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "for row in pd.read_csv('../data/v1/WikiCSSH_page2redirect.csv').values:\n",
    "    #print(row)\n",
    "    #break\n",
    "    if isinstance(row[-1], float):\n",
    "        row[-1] = row[0]\n",
    "    processor.add_keyword(row[-1].lower().replace(\"_\", \" \"), row[0])\n",
    "#df_redirects = pd.read_csv(wikicssh_files[4]) # redirects\n",
    "#df_redirects.head()\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "text = \"\"\"In the last decade, we experienced an urgent need for a flexible, context-sensitive, fine-grained, and machine-actionable representation of scholarly knowledge and corresponding infrastructures for knowledge curation, publishing and processing. Such technical infrastructures are becoming increasingly popular in representing scholarly knowledge as structured, interlinked, and semantically rich Scientific Knowledge Graphs (SKG). Knowledge graphs are large networks of entities and relationships, usually expressed in W3C standards such as OWL and RDF. SKGs focus on the scholarly domain and describe the actors (e.g., authors, organizations), the documents (e.g., publications, patents), and the research knowledge (e.g., research topics, tasks, technologies) in this space as well as their reciprocal relationships. These resources provide substantial benefits to researchers, companies, and policymakers by powering several data-driven services for navigating, analysing, and making sense of research dynamics. Some examples include Microsoft Academic Graph (MAG), Open Academic Graph (combining MAG and AMiner), ScholarlyData, PID Graph, Open Research Knowledge Graph, OpenCitations, and OpenAIRE research graph. Current challenges in this area include: i) the design of ontologies able to conceptualise scholarly knowledge, ii) (semi-)automatic extraction of entities and concepts, integration of information from heterogeneous sources, identification of duplicates, finding connections between entities, and iii) the development of new services using this data, that allow to explore this information, measure research impact and accelerate science. This workshop aims at bringing together researchers and practitioners from different fields (including, but not limited to, Digital Libraries, Information Extraction, Machine Learning, Semantic Web, Knowledge Engineering, Natural Language Processing, Scholarly Communication, and Bibliometrics) in order to explore innovative solutions and ideas for the production and consumption of Scientific Knowledge Graphs (SKGs).\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('Experience', 23, 34),\n",
       " ('Granularity', 85, 97),\n",
       " ('Scholarly_method', 140, 149),\n",
       " ('Knowledge', 150, 159),\n",
       " ('Knowledge', 198, 207),\n",
       " ('Scholarly_method', 326, 335),\n",
       " ('Knowledge', 336, 345),\n",
       " ('Semantics', 378, 390),\n",
       " ('Knowledge', 407, 416),\n",
       " ('Category:Graphs', 417, 423),\n",
       " ('Knowledge', 431, 440),\n",
       " ('Category:Graphs', 441, 447),\n",
       " ('Entity', 470, 478),\n",
       " ('World_Wide_Web_Consortium', 519, 532),\n",
       " ('Scholarly_method', 572, 581),\n",
       " ('Document', 649, 658),\n",
       " ('Research', 698, 706),\n",
       " ('Knowledge', 707, 716),\n",
       " ('Research', 724, 732),\n",
       " ('Category:Space', 770, 775),\n",
       " ('Research', 867, 878),\n",
       " ('Business', 880, 889),\n",
       " ('Research', 996, 1004),\n",
       " ('CONFIG.SYS', 1029, 1036),\n",
       " ('Microsoft_Academic', 1037, 1055),\n",
       " ('Academy_(educational_institution)', 1074, 1082),\n",
       " ('Open_research', 1143, 1156),\n",
       " ('Ontology_(information_science)', 1157, 1172),\n",
       " ('Research', 1202, 1210),\n",
       " ('Category:Area', 1245, 1249),\n",
       " ('CONFIG.SYS', 1250, 1257),\n",
       " ('Category:Design', 1266, 1272),\n",
       " ('Ontology', 1276, 1286),\n",
       " ('Concept', 1295, 1308),\n",
       " ('Scholarly_method', 1309, 1318),\n",
       " ('Knowledge', 1319, 1328),\n",
       " ('2', 1330, 1332),\n",
       " ('Numeral_prefix', 1335, 1340),\n",
       " ('Entity', 1365, 1373),\n",
       " ('Concept', 1378, 1386),\n",
       " ('Category:Information', 1403, 1414),\n",
       " ('Homogeneity_and_heterogeneity', 1420, 1433),\n",
       " ('Category:Identification', 1443, 1457),\n",
       " ('Entity', 1501, 1509),\n",
       " ('Category:Information', 1596, 1607),\n",
       " ('Research', 1617, 1625),\n",
       " ('Acceleration', 1637, 1647),\n",
       " ('Research', 1697, 1708),\n",
       " ('Digital_library', 1781, 1798),\n",
       " ('Information_extraction', 1800, 1822),\n",
       " ('Machine_learning', 1824, 1840),\n",
       " ('Semantic_Web', 1842, 1854),\n",
       " ('Knowledge_engineering', 1856, 1877),\n",
       " ('Natural_language_processing', 1879, 1906),\n",
       " ('Scholarly_communication', 1908, 1931),\n",
       " ('Category:Bibliometrics', 1937, 1950),\n",
       " ('Innovation', 1972, 1982),\n",
       " ('Solution', 1983, 1992),\n",
       " ('Idea', 1997, 2002),\n",
       " ('Category:Consumption', 2026, 2037),\n",
       " ('Knowledge', 2052, 2061),\n",
       " ('Category:Graphs', 2062, 2068)]"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "processor.extract_keywords(text, span_info=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_html(text, processor):\n",
    "    spans = processor.extract_keywords(text, span_info=True)\n",
    "    prev = 0\n",
    "    parts = []\n",
    "    category_counts = defaultdict(int)\n",
    "    for entity, start, end in spans:\n",
    "        if entity.startswith(\"Category:\"):\n",
    "            entity_cats = [entity.replace(\"Category:\", \"\")]\n",
    "        else:\n",
    "            entity_cats = [c for c in page2cats.get(entity, [])]\n",
    "        for cat in entity_cats:\n",
    "            category_counts[cat] += 1\n",
    "        if start > prev:\n",
    "            parts.append(text[prev:start])\n",
    "        parts.append(f\"<a href='https://en.wikipedia.org/wiki/{entity}' title='{entity}'>{text[start:end]}</a>\")\n",
    "        prev = end\n",
    "    tagged_doc = \"\".join(parts).replace(\"\\n\", \"<br/>\")\n",
    "    pred_categories = \" | \".join([\n",
    "        f\"<a href='https://en.wikipedia.org/wiki/Category:{k}' title='{k}'>{k}</a> ({v})\"\n",
    "        for k,v in sorted(category_counts.items(), key=lambda x: x[1], reverse=True)\n",
    "    ])\n",
    "    final_div = f\"\"\"<div>\n",
    "    <div>\n",
    "        <h3>Tagged document:</h3>\n",
    "        {tagged_doc}\n",
    "    </div>\n",
    "    <div>\n",
    "        <h3>Predicted categories:</h3>\n",
    "        {pred_categories}\n",
    "    </div>\n",
    "    </div>\"\"\"\n",
    "    return HTML(final_div)\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "    <div>\n",
       "        <h3>Tagged document:</h3>\n",
       "        In the last decade, we <a href='https://en.wikipedia.org/wiki/Experience' title='Experience'>experienced</a> an urgent need for a flexible, context-sensitive, <a href='https://en.wikipedia.org/wiki/Granularity' title='Granularity'>fine-grained</a>, and machine-actionable representation of <a href='https://en.wikipedia.org/wiki/Scholarly_method' title='Scholarly_method'>scholarly</a> <a href='https://en.wikipedia.org/wiki/Knowledge' title='Knowledge'>knowledge</a> and corresponding infrastructures for <a href='https://en.wikipedia.org/wiki/Knowledge' title='Knowledge'>knowledge</a> curation, publishing and processing. Such technical infrastructures are becoming increasingly popular in representing <a href='https://en.wikipedia.org/wiki/Scholarly_method' title='Scholarly_method'>scholarly</a> <a href='https://en.wikipedia.org/wiki/Knowledge' title='Knowledge'>knowledge</a> as structured, interlinked, and <a href='https://en.wikipedia.org/wiki/Semantics' title='Semantics'>semantically</a> rich Scientific <a href='https://en.wikipedia.org/wiki/Knowledge' title='Knowledge'>Knowledge</a> <a href='https://en.wikipedia.org/wiki/Category:Graphs' title='Category:Graphs'>Graphs</a> (SKG). <a href='https://en.wikipedia.org/wiki/Knowledge' title='Knowledge'>Knowledge</a> <a href='https://en.wikipedia.org/wiki/Category:Graphs' title='Category:Graphs'>graphs</a> are large networks of <a href='https://en.wikipedia.org/wiki/Entity' title='Entity'>entities</a> and relationships, usually expressed in <a href='https://en.wikipedia.org/wiki/World_Wide_Web_Consortium' title='World_Wide_Web_Consortium'>W3C standards</a> such as OWL and RDF. SKGs focus on the <a href='https://en.wikipedia.org/wiki/Scholarly_method' title='Scholarly_method'>scholarly</a> domain and describe the actors (e.g., authors, organizations), the <a href='https://en.wikipedia.org/wiki/Document' title='Document'>documents</a> (e.g., publications, patents), and the <a href='https://en.wikipedia.org/wiki/Research' title='Research'>research</a> <a href='https://en.wikipedia.org/wiki/Knowledge' title='Knowledge'>knowledge</a> (e.g., <a href='https://en.wikipedia.org/wiki/Research' title='Research'>research</a> topics, tasks, technologies) in this <a href='https://en.wikipedia.org/wiki/Category:Space' title='Category:Space'>space</a> as well as their reciprocal relationships. These resources provide substantial benefits to <a href='https://en.wikipedia.org/wiki/Research' title='Research'>researchers</a>, <a href='https://en.wikipedia.org/wiki/Business' title='Business'>companies</a>, and policymakers by powering several data-driven services for navigating, analysing, and making sense of <a href='https://en.wikipedia.org/wiki/Research' title='Research'>research</a> dynamics. Some examples <a href='https://en.wikipedia.org/wiki/CONFIG.SYS' title='CONFIG.SYS'>include</a> <a href='https://en.wikipedia.org/wiki/Microsoft_Academic' title='Microsoft_Academic'>Microsoft Academic</a> Graph (MAG), Open <a href='https://en.wikipedia.org/wiki/Academy_(educational_institution)' title='Academy_(educational_institution)'>Academic</a> Graph (combining MAG and AMiner), ScholarlyData, PID Graph, <a href='https://en.wikipedia.org/wiki/Open_research' title='Open_research'>Open Research</a> <a href='https://en.wikipedia.org/wiki/Ontology_(information_science)' title='Ontology_(information_science)'>Knowledge Graph</a>, OpenCitations, and OpenAIRE <a href='https://en.wikipedia.org/wiki/Research' title='Research'>research</a> graph. Current challenges in this <a href='https://en.wikipedia.org/wiki/Category:Area' title='Category:Area'>area</a> <a href='https://en.wikipedia.org/wiki/CONFIG.SYS' title='CONFIG.SYS'>include</a>: i) the <a href='https://en.wikipedia.org/wiki/Category:Design' title='Category:Design'>design</a> of <a href='https://en.wikipedia.org/wiki/Ontology' title='Ontology'>ontologies</a> able to <a href='https://en.wikipedia.org/wiki/Concept' title='Concept'>conceptualise</a> <a href='https://en.wikipedia.org/wiki/Scholarly_method' title='Scholarly_method'>scholarly</a> <a href='https://en.wikipedia.org/wiki/Knowledge' title='Knowledge'>knowledge</a>, <a href='https://en.wikipedia.org/wiki/2' title='2'>ii</a>) (<a href='https://en.wikipedia.org/wiki/Numeral_prefix' title='Numeral_prefix'>semi-</a>)automatic extraction of <a href='https://en.wikipedia.org/wiki/Entity' title='Entity'>entities</a> and <a href='https://en.wikipedia.org/wiki/Concept' title='Concept'>concepts</a>, integration of <a href='https://en.wikipedia.org/wiki/Category:Information' title='Category:Information'>information</a> from <a href='https://en.wikipedia.org/wiki/Homogeneity_and_heterogeneity' title='Homogeneity_and_heterogeneity'>heterogeneous</a> sources, <a href='https://en.wikipedia.org/wiki/Category:Identification' title='Category:Identification'>identification</a> of duplicates, finding connections between <a href='https://en.wikipedia.org/wiki/Entity' title='Entity'>entities</a>, and iii) the development of new services using this data, that allow to explore this <a href='https://en.wikipedia.org/wiki/Category:Information' title='Category:Information'>information</a>, measure <a href='https://en.wikipedia.org/wiki/Research' title='Research'>research</a> impact and <a href='https://en.wikipedia.org/wiki/Acceleration' title='Acceleration'>accelerate</a> science. This workshop aims at bringing together <a href='https://en.wikipedia.org/wiki/Research' title='Research'>researchers</a> and practitioners from different fields (including, but not limited to, <a href='https://en.wikipedia.org/wiki/Digital_library' title='Digital_library'>Digital Libraries</a>, <a href='https://en.wikipedia.org/wiki/Information_extraction' title='Information_extraction'>Information Extraction</a>, <a href='https://en.wikipedia.org/wiki/Machine_learning' title='Machine_learning'>Machine Learning</a>, <a href='https://en.wikipedia.org/wiki/Semantic_Web' title='Semantic_Web'>Semantic Web</a>, <a href='https://en.wikipedia.org/wiki/Knowledge_engineering' title='Knowledge_engineering'>Knowledge Engineering</a>, <a href='https://en.wikipedia.org/wiki/Natural_language_processing' title='Natural_language_processing'>Natural Language Processing</a>, <a href='https://en.wikipedia.org/wiki/Scholarly_communication' title='Scholarly_communication'>Scholarly Communication</a>, and <a href='https://en.wikipedia.org/wiki/Category:Bibliometrics' title='Category:Bibliometrics'>Bibliometrics</a>) in order to explore <a href='https://en.wikipedia.org/wiki/Innovation' title='Innovation'>innovative</a> <a href='https://en.wikipedia.org/wiki/Solution' title='Solution'>solutions</a> and <a href='https://en.wikipedia.org/wiki/Idea' title='Idea'>ideas</a> for the production and <a href='https://en.wikipedia.org/wiki/Category:Consumption' title='Category:Consumption'>consumption</a> of Scientific <a href='https://en.wikipedia.org/wiki/Knowledge' title='Knowledge'>Knowledge</a> <a href='https://en.wikipedia.org/wiki/Category:Graphs' title='Category:Graphs'>Graphs</a>\n",
       "    </div>\n",
       "    <div>\n",
       "        <h3>Predicted categories:</h3>\n",
       "        <a href='https://en.wikipedia.org/wiki/Category:Knowledge' title='Knowledge'>Knowledge</a> (15) | <a href='https://en.wikipedia.org/wiki/Category:Research' title='Research'>Research</a> (8) | <a href='https://en.wikipedia.org/wiki/Category:Research_methods' title='Research_methods'>Research_methods</a> (7) | <a href='https://en.wikipedia.org/wiki/Category:Academia' title='Academia'>Academia</a> (6) | <a href='https://en.wikipedia.org/wiki/Category:Methodology' title='Methodology'>Methodology</a> (4) | <a href='https://en.wikipedia.org/wiki/Category:Ontology' title='Ontology'>Ontology</a> (4) | <a href='https://en.wikipedia.org/wiki/Category:Graphs' title='Graphs'>Graphs</a> (3) | <a href='https://en.wikipedia.org/wiki/Category:Data_modeling_diagrams' title='Data_modeling_diagrams'>Data_modeling_diagrams</a> (3) | <a href='https://en.wikipedia.org/wiki/Category:Knowledge_engineering' title='Knowledge_engineering'>Knowledge_engineering</a> (3) | <a href='https://en.wikipedia.org/wiki/Category:Semantic_Web' title='Semantic_Web'>Semantic_Web</a> (3) | <a href='https://en.wikipedia.org/wiki/Category:Concepts' title='Concepts'>Concepts</a> (3) | <a href='https://en.wikipedia.org/wiki/Category:Meaning_(philosophy_of_language)' title='Meaning_(philosophy_of_language)'>Meaning_(philosophy_of_language)</a> (2) | <a href='https://en.wikipedia.org/wiki/Category:Web_services' title='Web_services'>Web_services</a> (2) | <a href='https://en.wikipedia.org/wiki/Category:Information_science' title='Information_science'>Information_science</a> (2) | <a href='https://en.wikipedia.org/wiki/Category:Configuration_files' title='Configuration_files'>Configuration_files</a> (2) | <a href='https://en.wikipedia.org/wiki/Category:Ontology_(information_science)' title='Ontology_(information_science)'>Ontology_(information_science)</a> (2) | <a href='https://en.wikipedia.org/wiki/Category:Design' title='Design'>Design</a> (2) | <a href='https://en.wikipedia.org/wiki/Category:Abstraction' title='Abstraction'>Abstraction</a> (2) | <a href='https://en.wikipedia.org/wiki/Category:Mental_content' title='Mental_content'>Mental_content</a> (2) | <a href='https://en.wikipedia.org/wiki/Category:Information' title='Information'>Information</a> (2) | <a href='https://en.wikipedia.org/wiki/Category:Library_science' title='Library_science'>Library_science</a> (2) | <a href='https://en.wikipedia.org/wiki/Category:Artificial_intelligence' title='Artificial_intelligence'>Artificial_intelligence</a> (2) | <a href='https://en.wikipedia.org/wiki/Category:Natural_language_processing' title='Natural_language_processing'>Natural_language_processing</a> (2) | <a href='https://en.wikipedia.org/wiki/Category:Perception' title='Perception'>Perception</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Statistical_mechanics' title='Statistical_mechanics'>Statistical_mechanics</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Web_development' title='Web_development'>Web_development</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Space' title='Space'>Space</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Entrepreneurship' title='Entrepreneurship'>Entrepreneurship</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Database_stubs' title='Database_stubs'>Database_stubs</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Online_databases' title='Online_databases'>Online_databases</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Open_content' title='Open_content'>Open_content</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Open_science' title='Open_science'>Open_science</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Collaboration' title='Collaboration'>Collaboration</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Knowledge_representation' title='Knowledge_representation'>Knowledge_representation</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Knowledge_bases' title='Knowledge_bases'>Knowledge_bases</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Area' title='Area'>Area</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Integers' title='Integers'>Integers</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Numeral_systems' title='Numeral_systems'>Numeral_systems</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Chemical_reactions' title='Chemical_reactions'>Chemical_reactions</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Identification' title='Identification'>Identification</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Acceleration' title='Acceleration'>Acceleration</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Machine_learning' title='Machine_learning'>Machine_learning</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Cybernetics' title='Cybernetics'>Cybernetics</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Learning' title='Learning'>Learning</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Internet_ages' title='Internet_ages'>Internet_ages</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Emerging_technologies' title='Emerging_technologies'>Emerging_technologies</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Computational_fields_of_study' title='Computational_fields_of_study'>Computational_fields_of_study</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Speech_recognition' title='Speech_recognition'>Speech_recognition</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Computational_linguistics' title='Computational_linguistics'>Computational_linguistics</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Bibliometrics' title='Bibliometrics'>Bibliometrics</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Innovation' title='Innovation'>Innovation</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Dosage_forms' title='Dosage_forms'>Dosage_forms</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Alchemical_processes' title='Alchemical_processes'>Alchemical_processes</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Solutions' title='Solutions'>Solutions</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Creativity' title='Creativity'>Creativity</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Consumption' title='Consumption'>Consumption</a> (1)\n",
       "    </div>\n",
       "    </div>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "display(get_html(text, processor))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "text = \"\"\"Methods for extracting entities (methods, research topics, technologies, tasks, materials, metrics, research contributions) and relationships from research publications\n",
    "Methods for extracting metadata about authors, documents, datasets, grants, affiliations and others.\n",
    "Data models (e.g., ontologies, vocabularies, schemas) for the description of scholarly data and the linking between scholarly data/software and academic papers that report or cite them\n",
    "Description of citations for scholarly articles, data and software and their interrelationships\n",
    "Applications for the (semi-)automatic annotation of scholarly papers\n",
    "Theoretical models describing the rhetorical and argumentative structure of scholarly papers and their application in practice\n",
    "Methods for quality assessment of scientific knowledge graphs\n",
    "Description and use of provenance information of scholarly data\n",
    "Methods for the exploration, retrieval and visualization of scientific knowledge graphs\n",
    "Pattern discovery of scholarly data\n",
    "Scientific claims identification from textual contents\n",
    "Automatic or semi-automatic approaches to making sense of research dynamics\n",
    "Content- and data-based analysis on scholarly papers\n",
    "Automatic semantic enhancement of existing scholarly libraries and papers\n",
    "Reconstruction, forecasting and monitoring of scholarly data\n",
    "Novel user interfaces for interaction with paper, metadata, content, software and data\n",
    "Visualisation of related papers or data according to multiple dimensions (semantic similarity of abstracts, keywords, etc.)\n",
    "Applications for making sense of scholarly data\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "    <div>\n",
       "        <h3>Tagged document:</h3>\n",
       "        Methods for extracting <a href='https://en.wikipedia.org/wiki/Entity' title='Entity'>entities</a> (methods, <a href='https://en.wikipedia.org/wiki/Research' title='Research'>research</a> topics, technologies, tasks, materials, <a href='https://en.wikipedia.org/wiki/Category:Metrics' title='Category:Metrics'>metrics</a>, <a href='https://en.wikipedia.org/wiki/Research' title='Research'>research</a> contributions) and relationships from <a href='https://en.wikipedia.org/wiki/Research' title='Research'>research</a> publications<br/>Methods for extracting <a href='https://en.wikipedia.org/wiki/Category:Metadata' title='Category:Metadata'>metadata</a> about authors, <a href='https://en.wikipedia.org/wiki/Document' title='Document'>documents</a>, datasets, grants, affiliations and others.<br/><a href='https://en.wikipedia.org/wiki/Data_model' title='Data_model'>Data models</a> (e.g., <a href='https://en.wikipedia.org/wiki/Ontology' title='Ontology'>ontologies</a>, <a href='https://en.wikipedia.org/wiki/Vocabulary' title='Vocabulary'>vocabularies</a>, schemas) for the description of <a href='https://en.wikipedia.org/wiki/Scholarly_method' title='Scholarly_method'>scholarly</a> data and the linking between <a href='https://en.wikipedia.org/wiki/Scholarly_method' title='Scholarly_method'>scholarly</a> data/<a href='https://en.wikipedia.org/wiki/Software' title='Software'>software</a> and <a href='https://en.wikipedia.org/wiki/Academy_(educational_institution)' title='Academy_(educational_institution)'>academic</a> papers that report or cite them<br/>Description of citations for <a href='https://en.wikipedia.org/wiki/Scholarly_method' title='Scholarly_method'>scholarly</a> articles, data and <a href='https://en.wikipedia.org/wiki/Software' title='Software'>software</a> and their interrelationships<br/>Applications for the (<a href='https://en.wikipedia.org/wiki/Numeral_prefix' title='Numeral_prefix'>semi-</a>)automatic annotation of <a href='https://en.wikipedia.org/wiki/Scholarly_method' title='Scholarly_method'>scholarly</a> papers<br/><a href='https://en.wikipedia.org/wiki/Theory' title='Theory'>Theoretical</a> models describing the <a href='https://en.wikipedia.org/wiki/Rhetoric' title='Rhetoric'>rhetorical</a> and argumentative structure of <a href='https://en.wikipedia.org/wiki/Scholarly_method' title='Scholarly_method'>scholarly</a> papers and their application in practice<br/>Methods for <a href='https://en.wikipedia.org/wiki/Quality_assurance' title='Quality_assurance'>quality assessment</a> of scientific <a href='https://en.wikipedia.org/wiki/Knowledge' title='Knowledge'>knowledge</a> <a href='https://en.wikipedia.org/wiki/Category:Graphs' title='Category:Graphs'>graphs</a><br/>Description and use of provenance <a href='https://en.wikipedia.org/wiki/Category:Information' title='Category:Information'>information</a> of <a href='https://en.wikipedia.org/wiki/Scholarly_method' title='Scholarly_method'>scholarly</a> data<br/>Methods for the exploration, retrieval and visualization of scientific <a href='https://en.wikipedia.org/wiki/Knowledge' title='Knowledge'>knowledge</a> <a href='https://en.wikipedia.org/wiki/Category:Graphs' title='Category:Graphs'>graphs</a><br/>Pattern discovery of <a href='https://en.wikipedia.org/wiki/Scholarly_method' title='Scholarly_method'>scholarly</a> data<br/>Scientific claims <a href='https://en.wikipedia.org/wiki/Category:Identification' title='Category:Identification'>identification</a> from <a href='https://en.wikipedia.org/wiki/Textuality' title='Textuality'>textual</a> contents<br/>Automatic or semi-automatic approaches to making sense of <a href='https://en.wikipedia.org/wiki/Research' title='Research'>research</a> dynamics<br/>Content- and data-based <a href='https://en.wikipedia.org/wiki/Category:Analysis' title='Category:Analysis'>analysis</a> on <a href='https://en.wikipedia.org/wiki/Scholarly_method' title='Scholarly_method'>scholarly</a> papers<br/>Automatic <a href='https://en.wikipedia.org/wiki/Semantics' title='Semantics'>semantic</a> enhancement of <a href='https://en.wikipedia.org/wiki/Existence' title='Existence'>existing</a> <a href='https://en.wikipedia.org/wiki/Scholarly_method' title='Scholarly_method'>scholarly</a> <a href='https://en.wikipedia.org/wiki/Library' title='Library'>libraries</a> and papers<br/>Reconstruction, <a href='https://en.wikipedia.org/wiki/Category:Forecasting' title='Category:Forecasting'>forecasting</a> and monitoring of <a href='https://en.wikipedia.org/wiki/Scholarly_method' title='Scholarly_method'>scholarly</a> data<br/>Novel <a href='https://en.wikipedia.org/wiki/User_interface' title='User_interface'>user interfaces</a> for interaction with <a href='https://en.wikipedia.org/wiki/Paper' title='Paper'>paper</a>, <a href='https://en.wikipedia.org/wiki/Category:Metadata' title='Category:Metadata'>metadata</a>, content, <a href='https://en.wikipedia.org/wiki/Software' title='Software'>software</a> and data<br/>Visualisation of related papers or data according to multiple <a href='https://en.wikipedia.org/wiki/Dimension' title='Dimension'>dimensions</a> (<a href='https://en.wikipedia.org/wiki/Semantics' title='Semantics'>semantic</a> similarity of abstracts, keywords, etc.<a href='https://en.wikipedia.org/wiki/Bracket' title='Bracket'>)</a><br/>Applications for making sense of <a href='https://en.wikipedia.org/wiki/Scholarly_method' title='Scholarly_method'>scholarly</a>\n",
       "    </div>\n",
       "    <div>\n",
       "        <h3>Predicted categories:</h3>\n",
       "        <a href='https://en.wikipedia.org/wiki/Category:Academia' title='Academia'>Academia</a> (12) | <a href='https://en.wikipedia.org/wiki/Category:Methodology' title='Methodology'>Methodology</a> (11) | <a href='https://en.wikipedia.org/wiki/Category:Knowledge' title='Knowledge'>Knowledge</a> (6) | <a href='https://en.wikipedia.org/wiki/Category:Research_methods' title='Research_methods'>Research_methods</a> (4) | <a href='https://en.wikipedia.org/wiki/Category:Research' title='Research'>Research</a> (4) | <a href='https://en.wikipedia.org/wiki/Category:Meaning_(philosophy_of_language)' title='Meaning_(philosophy_of_language)'>Meaning_(philosophy_of_language)</a> (3) | <a href='https://en.wikipedia.org/wiki/Category:Computer_science' title='Computer_science'>Computer_science</a> (3) | <a href='https://en.wikipedia.org/wiki/Category:Software' title='Software'>Software</a> (3) | <a href='https://en.wikipedia.org/wiki/Category:Metadata' title='Metadata'>Metadata</a> (2) | <a href='https://en.wikipedia.org/wiki/Category:Ontology' title='Ontology'>Ontology</a> (2) | <a href='https://en.wikipedia.org/wiki/Category:Graphs' title='Graphs'>Graphs</a> (2) | <a href='https://en.wikipedia.org/wiki/Category:Data_modeling_diagrams' title='Data_modeling_diagrams'>Data_modeling_diagrams</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Metrics' title='Metrics'>Metrics</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Information_science' title='Information_science'>Information_science</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Data_modeling' title='Data_modeling'>Data_modeling</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Lexicography' title='Lexicography'>Lexicography</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Vocabulary' title='Vocabulary'>Vocabulary</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Numeral_systems' title='Numeral_systems'>Numeral_systems</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Inductive_reasoning' title='Inductive_reasoning'>Inductive_reasoning</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Abstraction' title='Abstraction'>Abstraction</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Theories' title='Theories'>Theories</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Critical_thinking_skills' title='Critical_thinking_skills'>Critical_thinking_skills</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Quality_assurance' title='Quality_assurance'>Quality_assurance</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Information' title='Information'>Information</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Identification' title='Identification'>Identification</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Structuralism' title='Structuralism'>Structuralism</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Analysis' title='Analysis'>Analysis</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Library_science' title='Library_science'>Library_science</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Forecasting' title='Forecasting'>Forecasting</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Human-machine_interaction' title='Human-machine_interaction'>Human-machine_interaction</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Virtual_reality' title='Virtual_reality'>Virtual_reality</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:User_interfaces' title='User_interfaces'>User_interfaces</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:User_interface_techniques' title='User_interface_techniques'>User_interface_techniques</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Papermaking' title='Papermaking'>Papermaking</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Packaging_materials' title='Packaging_materials'>Packaging_materials</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Printing_materials' title='Printing_materials'>Printing_materials</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Mathematical_concepts' title='Mathematical_concepts'>Mathematical_concepts</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Abstract_algebra' title='Abstract_algebra'>Abstract_algebra</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Geometric_measurement' title='Geometric_measurement'>Geometric_measurement</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Dimension' title='Dimension'>Dimension</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Mathematical_notation' title='Mathematical_notation'>Mathematical_notation</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Punctuation' title='Punctuation'>Punctuation</a> (1)\n",
       "    </div>\n",
       "    </div>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "display(get_html(text, processor))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "text=\"\"\"One of the most common AI techniques used for processing big data is machine learning, a self-adaptive algorithm that gets increasingly better analysis and patterns with experience or with newly added data.\n",
    "\n",
    "If a digital payments company wanted to detect the occurrence or potential for fraud in its system, it could employ machine learning tools for this purpose. The computational algorithm built into a computer model will process all transactions happening on the digital platform, find patterns in the data set, and point out any anomaly detected by the pattern.\n",
    "\n",
    "Deep learning, a subset of machine learning, utilizes a hierarchical level of artificial neural networks to carry out the process of machine learning. The artificial neural networks are built like the human brain, with neuron nodes connected together like a web. While traditional programs build analysis with data in a linear way, the hierarchical function of deep learning systems enables machines to process data with a nonlinear approach.\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "    <div>\n",
       "        <h3>Tagged document:</h3>\n",
       "        <a href='https://en.wikipedia.org/wiki/1' title='1'>One</a> of the most common <a href='https://en.wikipedia.org/wiki/Artificial_intelligence' title='Artificial_intelligence'>AI</a> <a href='https://en.wikipedia.org/wiki/Category:Techniques' title='Category:Techniques'>techniques</a> used for processing <a href='https://en.wikipedia.org/wiki/Big_data' title='Big_data'>big data</a> is <a href='https://en.wikipedia.org/wiki/Machine_learning' title='Machine_learning'>machine learning</a>, a self-<a href='https://en.wikipedia.org/wiki/Adaptive_algorithm' title='Adaptive_algorithm'>adaptive algorithm</a> that gets increasingly better <a href='https://en.wikipedia.org/wiki/Category:Analysis' title='Category:Analysis'>analysis</a> and <a href='https://en.wikipedia.org/wiki/Pattern' title='Pattern'>patterns</a> with experience or with newly added data.<br/><br/>If a digital payments company wanted to <a href='https://en.wikipedia.org/wiki/Sensor' title='Sensor'>detect</a> the <a href='https://en.wikipedia.org/wiki/Type-token_distinction' title='Type-token_distinction'>occurrence</a> or potential for fraud in its system, <a href='https://en.wikipedia.org/wiki/Information_technology' title='Information_technology'>it</a> could <a href='https://en.wikipedia.org/wiki/Employment' title='Employment'>employ</a> <a href='https://en.wikipedia.org/wiki/Machine_learning' title='Machine_learning'>machine learning</a> tools for this purpose. The <a href='https://en.wikipedia.org/wiki/Computation' title='Computation'>computational</a> algorithm built into a <a href='https://en.wikipedia.org/wiki/Computer_simulation' title='Computer_simulation'>computer model</a> will process all transactions happening on the digital platform, find <a href='https://en.wikipedia.org/wiki/Pattern' title='Pattern'>patterns</a> in the data <a href='https://en.wikipedia.org/wiki/Set_(mathematics)' title='Set_(mathematics)'>set</a>, and point out any anomaly detected by the pattern.<br/><br/><a href='https://en.wikipedia.org/wiki/Deep_learning' title='Deep_learning'>Deep learning</a>, a <a href='https://en.wikipedia.org/wiki/Subset' title='Subset'>subset</a> of <a href='https://en.wikipedia.org/wiki/Machine_learning' title='Machine_learning'>machine learning</a>, utilizes a <a href='https://en.wikipedia.org/wiki/Hierarchy' title='Hierarchy'>hierarchical</a> level of <a href='https://en.wikipedia.org/wiki/Artificial_neural_network' title='Artificial_neural_network'>artificial neural networks</a> to carry out the process of <a href='https://en.wikipedia.org/wiki/Machine_learning' title='Machine_learning'>machine learning</a>. The <a href='https://en.wikipedia.org/wiki/Artificial_neural_network' title='Artificial_neural_network'>artificial neural networks</a> are built like <a href='https://en.wikipedia.org/wiki/Human_brain' title='Human_brain'>the human brain</a>, with <a href='https://en.wikipedia.org/wiki/Neuron_(software)' title='Neuron_(software)'>neuron</a> nodes <a href='https://en.wikipedia.org/wiki/National_Broadband_Plan_(United_States)' title='National_Broadband_Plan_(United_States)'>connected</a> together like a web. While traditional programs build <a href='https://en.wikipedia.org/wiki/Category:Analysis' title='Category:Analysis'>analysis</a> with data in a <a href='https://en.wikipedia.org/wiki/Linearity' title='Linearity'>linear</a> way, the <a href='https://en.wikipedia.org/wiki/Hierarchy' title='Hierarchy'>hierarchical</a> function of <a href='https://en.wikipedia.org/wiki/Deep_learning' title='Deep_learning'>deep learning</a> systems enables machines to process data with a <a href='https://en.wikipedia.org/wiki/Nonlinear_system' title='Nonlinear_system'>nonlinear</a>\n",
       "    </div>\n",
       "    <div>\n",
       "        <h3>Predicted categories:</h3>\n",
       "        <a href='https://en.wikipedia.org/wiki/Category:Cybernetics' title='Cybernetics'>Cybernetics</a> (5) | <a href='https://en.wikipedia.org/wiki/Category:Computational_neuroscience' title='Computational_neuroscience'>Computational_neuroscience</a> (4) | <a href='https://en.wikipedia.org/wiki/Category:Machine_learning' title='Machine_learning'>Machine_learning</a> (4) | <a href='https://en.wikipedia.org/wiki/Category:Learning' title='Learning'>Learning</a> (4) | <a href='https://en.wikipedia.org/wiki/Category:Patterns' title='Patterns'>Patterns</a> (4) | <a href='https://en.wikipedia.org/wiki/Category:Computational_fields_of_study' title='Computational_fields_of_study'>Computational_fields_of_study</a> (2) | <a href='https://en.wikipedia.org/wiki/Category:Analysis' title='Analysis'>Analysis</a> (2) | <a href='https://en.wikipedia.org/wiki/Category:Design' title='Design'>Design</a> (2) | <a href='https://en.wikipedia.org/wiki/Category:Deep_learning' title='Deep_learning'>Deep_learning</a> (2) | <a href='https://en.wikipedia.org/wiki/Category:Hierarchy' title='Hierarchy'>Hierarchy</a> (2) | <a href='https://en.wikipedia.org/wiki/Category:Computational_statistics' title='Computational_statistics'>Computational_statistics</a> (2) | <a href='https://en.wikipedia.org/wiki/Category:Artificial_neural_networks' title='Artificial_neural_networks'>Artificial_neural_networks</a> (2) | <a href='https://en.wikipedia.org/wiki/Category:Classification_algorithms' title='Classification_algorithms'>Classification_algorithms</a> (2) | <a href='https://en.wikipedia.org/wiki/Category:Mathematical_psychology' title='Mathematical_psychology'>Mathematical_psychology</a> (2) | <a href='https://en.wikipedia.org/wiki/Category:Integers' title='Integers'>Integers</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Unsolved_problems_in_computer_science' title='Unsolved_problems_in_computer_science'>Unsolved_problems_in_computer_science</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Artificial_intelligence' title='Artificial_intelligence'>Artificial_intelligence</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Emerging_technologies' title='Emerging_technologies'>Emerging_technologies</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Techniques' title='Techniques'>Techniques</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Data_management' title='Data_management'>Data_management</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Transaction_processing' title='Transaction_processing'>Transaction_processing</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Distributed_computing_problems' title='Distributed_computing_problems'>Distributed_computing_problems</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Big_data' title='Big_data'>Big_data</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Software_engineering_stubs' title='Software_engineering_stubs'>Software_engineering_stubs</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Algorithms' title='Algorithms'>Algorithms</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Transducers' title='Transducers'>Transducers</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Sensors' title='Sensors'>Sensors</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Measuring_instruments' title='Measuring_instruments'>Measuring_instruments</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Knowledge_representation' title='Knowledge_representation'>Knowledge_representation</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Metalogic' title='Metalogic'>Metalogic</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Abstraction' title='Abstraction'>Abstraction</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Information-theoretically_secure_algorithms' title='Information-theoretically_secure_algorithms'>Information-theoretically_secure_algorithms</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Employment' title='Employment'>Employment</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Theoretical_computer_science' title='Theoretical_computer_science'>Theoretical_computer_science</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Computability_theory' title='Computability_theory'>Computability_theory</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Virtual_reality' title='Virtual_reality'>Virtual_reality</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Computational_science' title='Computational_science'>Computational_science</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Simulation_software' title='Simulation_software'>Simulation_software</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Scientific_modeling' title='Scientific_modeling'>Scientific_modeling</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Mathematical_objects' title='Mathematical_objects'>Mathematical_objects</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Set_theory' title='Set_theory'>Set_theory</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Basic_concepts_in_set_theory' title='Basic_concepts_in_set_theory'>Basic_concepts_in_set_theory</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Brain' title='Brain'>Brain</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Broadband' title='Broadband'>Broadband</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Wireless_networking' title='Wireless_networking'>Wireless_networking</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Elementary_algebra' title='Elementary_algebra'>Elementary_algebra</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Dynamical_systems' title='Dynamical_systems'>Dynamical_systems</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Nonlinear_systems' title='Nonlinear_systems'>Nonlinear_systems</a> (1)\n",
       "    </div>\n",
       "    </div>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "display(get_html(text, processor))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [],
   "source": [
    "text=\"\"\"Commonsense knowledge graph reasoning(CKGR) is the task of predicting a missing entity given one existing and the relation in a commonsense knowledge graph (CKG). Existing methods can be classified into two categories generation method and selection method. Each method has its own advantage. We theoretically and empirically compare the two methods, finding the selection method is more suitable than the generation method in CKGR. Given the observation, we further combine the structure of neural Text Encoder and Knowledge Graph Embedding models to solve the selection method's two problems, achieving competitive results. We provide a basic framework and baseline model for subsequent CKGR tasks by selection methods.\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "    <div>\n",
       "        <h3>Tagged document:</h3>\n",
       "        <a href='https://en.wikipedia.org/wiki/Commonsense_knowledge_(artificial_intelligence)' title='Commonsense_knowledge_(artificial_intelligence)'>Commonsense knowledge</a> graph <a href='https://en.wikipedia.org/wiki/Reason' title='Reason'>reasoning</a>(CKGR) is the <a href='https://en.wikipedia.org/wiki/Tandem_pore_domain_potassium_channel' title='Tandem_pore_domain_potassium_channel'>task</a> of <a href='https://en.wikipedia.org/wiki/Prediction' title='Prediction'>predicting</a> a missing entity given <a href='https://en.wikipedia.org/wiki/1' title='1'>one</a> <a href='https://en.wikipedia.org/wiki/Existence' title='Existence'>existing</a> and the relation in a <a href='https://en.wikipedia.org/wiki/Commonsense_knowledge_(artificial_intelligence)' title='Commonsense_knowledge_(artificial_intelligence)'>commonsense knowledge</a> graph (CKG). <a href='https://en.wikipedia.org/wiki/Existence' title='Existence'>Existing</a> methods can be classified into <a href='https://en.wikipedia.org/wiki/2' title='2'>two</a> categories generation method and selection method. Each method has its own advantage. We theoretically and empirically compare the <a href='https://en.wikipedia.org/wiki/2' title='2'>two</a> methods, finding the selection method is more suitable than the generation method in CKGR. Given the observation, we further <a href='https://en.wikipedia.org/wiki/COMBINE' title='COMBINE'>combine</a> the structure of <a href='https://en.wikipedia.org/wiki/Nervous_system' title='Nervous_system'>neural</a> Text Encoder and <a href='https://en.wikipedia.org/wiki/Ontology_(information_science)' title='Ontology_(information_science)'>Knowledge Graph</a> Embedding models to solve the selection method's <a href='https://en.wikipedia.org/wiki/Regular_expression' title='Regular_expression'>two problems</a>\n",
       "    </div>\n",
       "    <div>\n",
       "        <h3>Predicted categories:</h3>\n",
       "        <a href='https://en.wikipedia.org/wiki/Category:Knowledge_bases' title='Knowledge_bases'>Knowledge_bases</a> (3) | <a href='https://en.wikipedia.org/wiki/Category:Integers' title='Integers'>Integers</a> (3) | <a href='https://en.wikipedia.org/wiki/Category:Ontology' title='Ontology'>Ontology</a> (2) | <a href='https://en.wikipedia.org/wiki/Category:Reasoning' title='Reasoning'>Reasoning</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Belief' title='Belief'>Belief</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Epistemology' title='Epistemology'>Epistemology</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Ion_channels' title='Ion_channels'>Ion_channels</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Futurology' title='Futurology'>Futurology</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Biological_databases' title='Biological_databases'>Biological_databases</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Nervous_system' title='Nervous_system'>Nervous_system</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Information_science' title='Information_science'>Information_science</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Knowledge_engineering' title='Knowledge_engineering'>Knowledge_engineering</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Knowledge_representation' title='Knowledge_representation'>Knowledge_representation</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Ontology_(information_science)' title='Ontology_(information_science)'>Ontology_(information_science)</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Semantic_Web' title='Semantic_Web'>Semantic_Web</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Automata_(computation)' title='Automata_(computation)'>Automata_(computation)</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Pattern_matching' title='Pattern_matching'>Pattern_matching</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Formal_languages' title='Formal_languages'>Formal_languages</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Programming_constructs' title='Programming_constructs'>Programming_constructs</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Regular_expressions' title='Regular_expressions'>Regular_expressions</a> (1)\n",
       "    </div>\n",
       "    </div>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "display(get_html(text, processor))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "text=\"\"\"We introduce several measures of novelty for a scientific article in MEDLINE based on the temporal profiles of its assigned Medical Subject Headings (MeSH). First, temporal profiles for all MeSH terms (and pairs of MeSH terms) were characterized empirically and modelled as logistic growth curves. Second, a paper's novelty is captured by its youngest MeSH (and pairs of MeSH) as measured in years and volume of prior work. Across all papers in MEDLINE published since 1985, we find that individual concept novelty is rare (2.7% of papers have a MeSH ≤ 3 years old; 1.0% have a MeSH ≤ 20 papers old), while combinatorial novelty is the norm (68% have a pair of MeSH ≤ 3 years old; 90% have a pair of MeSH ≤ 10 papers old). Furthermore, these novelty measures exhibit complex correlations with article impact (as measured by citations received) and authors' professional age.\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "    <div>\n",
       "        <h3>Tagged document:</h3>\n",
       "        We introduce several measures of <a href='https://en.wikipedia.org/wiki/Novelty' title='Novelty'>novelty</a> for a <a href='https://en.wikipedia.org/wiki/Scientific_literature' title='Scientific_literature'>scientific article</a> in <a href='https://en.wikipedia.org/wiki/MEDLINE' title='MEDLINE'>MEDLINE</a> based on the temporal profiles of its assigned <a href='https://en.wikipedia.org/wiki/Medical_Subject_Headings' title='Medical_Subject_Headings'>Medical Subject Headings (MeSH)</a>. <a href='https://en.wikipedia.org/wiki/For_Inspiration_and_Recognition_of_Science_and_Technology' title='For_Inspiration_and_Recognition_of_Science_and_Technology'>First</a>, temporal profiles <a href='https://en.wikipedia.org/wiki/Universal_quantification' title='Universal_quantification'>for all</a> <a href='https://en.wikipedia.org/wiki/Medical_Subject_Headings' title='Medical_Subject_Headings'>MeSH</a> terms (and <a href='https://en.wikipedia.org/wiki/PAIRS_Foundation' title='PAIRS_Foundation'>pairs</a> of <a href='https://en.wikipedia.org/wiki/Medical_Subject_Headings' title='Medical_Subject_Headings'>MeSH</a> terms) were characterized empirically and modelled as <a href='https://en.wikipedia.org/wiki/Logistic_function' title='Logistic_function'>logistic growth</a> <a href='https://en.wikipedia.org/wiki/Category:Curves' title='Category:Curves'>curves</a>. Second, a <a href='https://en.wikipedia.org/wiki/Paper' title='Paper'>paper</a>'s <a href='https://en.wikipedia.org/wiki/Novelty' title='Novelty'>novelty</a> is captured by its youngest <a href='https://en.wikipedia.org/wiki/Medical_Subject_Headings' title='Medical_Subject_Headings'>MeSH</a> (and <a href='https://en.wikipedia.org/wiki/PAIRS_Foundation' title='PAIRS_Foundation'>pairs</a> of <a href='https://en.wikipedia.org/wiki/Medical_Subject_Headings' title='Medical_Subject_Headings'>MeSH</a>) as <a href='https://en.wikipedia.org/wiki/Measurement' title='Measurement'>measured</a> in years and <a href='https://en.wikipedia.org/wiki/Category:Volume' title='Category:Volume'>volume</a> of prior work. Across all papers in <a href='https://en.wikipedia.org/wiki/MEDLINE' title='MEDLINE'>MEDLINE</a> published since 1985, we find that individual concept <a href='https://en.wikipedia.org/wiki/Novelty' title='Novelty'>novelty</a> is rare (<a href='https://en.wikipedia.org/wiki/E_(mathematical_constant)' title='E_(mathematical_constant)'>2.7</a>% of papers have a <a href='https://en.wikipedia.org/wiki/Medical_Subject_Headings' title='Medical_Subject_Headings'>MeSH</a> ≤ 3 years old; <a href='https://en.wikipedia.org/wiki/1' title='1'>1.0</a>% have a <a href='https://en.wikipedia.org/wiki/Medical_Subject_Headings' title='Medical_Subject_Headings'>MeSH</a> ≤ 20 papers old), while <a href='https://en.wikipedia.org/wiki/Combinatorics' title='Combinatorics'>combinatorial</a> <a href='https://en.wikipedia.org/wiki/Novelty' title='Novelty'>novelty</a> is the <a href='https://en.wikipedia.org/wiki/Naturally_occurring_radioactive_material' title='Naturally_occurring_radioactive_material'>norm</a> (<a href='https://en.wikipedia.org/wiki/Normal_distribution' title='Normal_distribution'>68%</a> have a pair of <a href='https://en.wikipedia.org/wiki/Medical_Subject_Headings' title='Medical_Subject_Headings'>MeSH</a> ≤ 3 years old; 90% have a pair of <a href='https://en.wikipedia.org/wiki/Medical_Subject_Headings' title='Medical_Subject_Headings'>MeSH</a> ≤ 10 papers old). Furthermore, these <a href='https://en.wikipedia.org/wiki/Novelty' title='Novelty'>novelty</a> measures exhibit <a href='https://en.wikipedia.org/wiki/UCL_Faculty_of_Mathematical_and_Physical_Sciences' title='UCL_Faculty_of_Mathematical_and_Physical_Sciences'>complex</a> <a href='https://en.wikipedia.org/wiki/Correlation_and_dependence' title='Correlation_and_dependence'>correlations</a> with article impact (as <a href='https://en.wikipedia.org/wiki/Measurement' title='Measurement'>measured</a>\n",
       "    </div>\n",
       "    <div>\n",
       "        <h3>Predicted categories:</h3>\n",
       "        <a href='https://en.wikipedia.org/wiki/Category:Biological_databases' title='Biological_databases'>Biological_databases</a> (11) | <a href='https://en.wikipedia.org/wiki/Category:Thesauri' title='Thesauri'>Thesauri</a> (9) | <a href='https://en.wikipedia.org/wiki/Category:Library_cataloging_and_classification' title='Library_cataloging_and_classification'>Library_cataloging_and_classification</a> (9) | <a href='https://en.wikipedia.org/wiki/Category:Medical_classification' title='Medical_classification'>Medical_classification</a> (9) | <a href='https://en.wikipedia.org/wiki/Category:Innovation' title='Innovation'>Innovation</a> (5) | <a href='https://en.wikipedia.org/wiki/Category:Bibliographic_databases_and_indexes' title='Bibliographic_databases_and_indexes'>Bibliographic_databases_and_indexes</a> (2) | <a href='https://en.wikipedia.org/wiki/Category:Medical_databases' title='Medical_databases'>Medical_databases</a> (2) | <a href='https://en.wikipedia.org/wiki/Category:Online_databases' title='Online_databases'>Online_databases</a> (2) | <a href='https://en.wikipedia.org/wiki/Category:Interpersonal_relationships' title='Interpersonal_relationships'>Interpersonal_relationships</a> (2) | <a href='https://en.wikipedia.org/wiki/Category:Interpersonal_communication' title='Interpersonal_communication'>Interpersonal_communication</a> (2) | <a href='https://en.wikipedia.org/wiki/Category:Relationship_counseling' title='Relationship_counseling'>Relationship_counseling</a> (2) | <a href='https://en.wikipedia.org/wiki/Category:Family_therapy' title='Family_therapy'>Family_therapy</a> (2) | <a href='https://en.wikipedia.org/wiki/Category:Curves' title='Curves'>Curves</a> (2) | <a href='https://en.wikipedia.org/wiki/Category:Accuracy_and_precision' title='Accuracy_and_precision'>Accuracy_and_precision</a> (2) | <a href='https://en.wikipedia.org/wiki/Category:Measurement' title='Measurement'>Measurement</a> (2) | <a href='https://en.wikipedia.org/wiki/Category:Metrology' title='Metrology'>Metrology</a> (2) | <a href='https://en.wikipedia.org/wiki/Category:Information_science' title='Information_science'>Information_science</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Robotics' title='Robotics'>Robotics</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Quantification' title='Quantification'>Quantification</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Population_ecology' title='Population_ecology'>Population_ecology</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Special_functions' title='Special_functions'>Special_functions</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Differential_equations' title='Differential_equations'>Differential_equations</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Papermaking' title='Papermaking'>Papermaking</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Packaging_materials' title='Packaging_materials'>Packaging_materials</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Printing_materials' title='Printing_materials'>Printing_materials</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Volume' title='Volume'>Volume</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Mathematical_constants' title='Mathematical_constants'>Mathematical_constants</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Transcendental_numbers' title='Transcendental_numbers'>Transcendental_numbers</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Real_transcendental_numbers' title='Real_transcendental_numbers'>Real_transcendental_numbers</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Integers' title='Integers'>Integers</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Combinatorics' title='Combinatorics'>Combinatorics</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Radioactivity' title='Radioactivity'>Radioactivity</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Conjugate_prior_distributions' title='Conjugate_prior_distributions'>Conjugate_prior_distributions</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Continuous_distributions' title='Continuous_distributions'>Continuous_distributions</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Location-scale_family_probability_distributions' title='Location-scale_family_probability_distributions'>Location-scale_family_probability_distributions</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Stable_distributions' title='Stable_distributions'>Stable_distributions</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Normal_distribution' title='Normal_distribution'>Normal_distribution</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Complex_systems_theory' title='Complex_systems_theory'>Complex_systems_theory</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Covariance_and_correlation' title='Covariance_and_correlation'>Covariance_and_correlation</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Dimensionless_numbers' title='Dimensionless_numbers'>Dimensionless_numbers</a> (1)\n",
       "    </div>\n",
       "    </div>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "display(get_html(text, processor))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [],
   "source": [
    "text=\"\"\"Deep Learning is a subfield of machine learning concerned with algorithms inspired by the structure and function of the brain called artificial neural networks.\n",
    "If you are just starting out in the field of deep learning or you had some experience with neural networks some time ago, you may be confused. I know I was confused initially and so were many of my colleagues and friends who learned and used neural networks in the 1990s and early 2000s.\n",
    "The leaders and experts in the field have ideas of what deep learning is and these specific and nuanced perspectives shed a lot of light on what deep learning is all about.\n",
    "In this post, you will discover exactly what deep learning is by hearing from a range of experts and leaders in the field.\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "    <div>\n",
       "        <h3>Tagged document:</h3>\n",
       "        <a href='https://en.wikipedia.org/wiki/Deep_learning' title='Deep_learning'>Deep Learning</a> <a href='https://en.wikipedia.org/wiki/Is-a' title='Is-a'>is a</a> subfield of <a href='https://en.wikipedia.org/wiki/Machine_learning' title='Machine_learning'>machine learning</a> concerned with <a href='https://en.wikipedia.org/wiki/Algorithm' title='Algorithm'>algorithms</a> inspired by the structure and function of <a href='https://en.wikipedia.org/wiki/Brain' title='Brain'>the brain</a> called <a href='https://en.wikipedia.org/wiki/Artificial_neural_network' title='Artificial_neural_network'>artificial neural networks</a>.<br/>If you are just starting out in the field of <a href='https://en.wikipedia.org/wiki/Deep_learning' title='Deep_learning'>deep learning</a> or you <a href='https://en.wikipedia.org/wiki/HAD_CCD' title='HAD_CCD'>had</a> some experience with <a href='https://en.wikipedia.org/wiki/Artificial_neural_network' title='Artificial_neural_network'>neural networks</a> some <a href='https://en.wikipedia.org/wiki/Time_(magazine)' title='Time_(magazine)'>time</a> ago, you may be <a href='https://en.wikipedia.org/wiki/Confusion' title='Confusion'>confused</a>. I <a href='https://en.wikipedia.org/wiki/Knowledge' title='Knowledge'>know</a> I was <a href='https://en.wikipedia.org/wiki/Confusion' title='Confusion'>confused</a> initially and so were many of my colleagues and friends who <a href='https://en.wikipedia.org/wiki/Learning' title='Learning'>learned</a> and used <a href='https://en.wikipedia.org/wiki/Artificial_neural_network' title='Artificial_neural_network'>neural networks</a> in the 1990s and early 2000s.<br/>The leaders and <a href='https://en.wikipedia.org/wiki/Expert' title='Expert'>experts</a> in the field have <a href='https://en.wikipedia.org/wiki/Idea' title='Idea'>ideas</a> of what <a href='https://en.wikipedia.org/wiki/Deep_learning' title='Deep_learning'>deep learning</a> is and these specific and nuanced perspectives shed a lot of <a href='https://en.wikipedia.org/wiki/Category:Light' title='Category:Light'>light</a> on what <a href='https://en.wikipedia.org/wiki/Deep_learning' title='Deep_learning'>deep learning</a> is all about.<br/>In this post, you will discover exactly what <a href='https://en.wikipedia.org/wiki/Deep_learning' title='Deep_learning'>deep learning</a> is by <a href='https://en.wikipedia.org/wiki/Category:Hearing' title='Category:Hearing'>hearing</a> from a range of <a href='https://en.wikipedia.org/wiki/Expert' title='Expert'>experts</a>\n",
       "    </div>\n",
       "    <div>\n",
       "        <h3>Predicted categories:</h3>\n",
       "        <a href='https://en.wikipedia.org/wiki/Category:Deep_learning' title='Deep_learning'>Deep_learning</a> (5) | <a href='https://en.wikipedia.org/wiki/Category:Computational_statistics' title='Computational_statistics'>Computational_statistics</a> (3) | <a href='https://en.wikipedia.org/wiki/Category:Computational_neuroscience' title='Computational_neuroscience'>Computational_neuroscience</a> (3) | <a href='https://en.wikipedia.org/wiki/Category:Artificial_neural_networks' title='Artificial_neural_networks'>Artificial_neural_networks</a> (3) | <a href='https://en.wikipedia.org/wiki/Category:Classification_algorithms' title='Classification_algorithms'>Classification_algorithms</a> (3) | <a href='https://en.wikipedia.org/wiki/Category:Mathematical_psychology' title='Mathematical_psychology'>Mathematical_psychology</a> (3) | <a href='https://en.wikipedia.org/wiki/Category:Knowledge' title='Knowledge'>Knowledge</a> (3) | <a href='https://en.wikipedia.org/wiki/Category:Learning' title='Learning'>Learning</a> (2) | <a href='https://en.wikipedia.org/wiki/Category:Emotions' title='Emotions'>Emotions</a> (2) | <a href='https://en.wikipedia.org/wiki/Category:Skills' title='Skills'>Skills</a> (2) | <a href='https://en.wikipedia.org/wiki/Category:Knowledge_representation' title='Knowledge_representation'>Knowledge_representation</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Object-oriented_programming' title='Object-oriented_programming'>Object-oriented_programming</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Abstraction' title='Abstraction'>Abstraction</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Machine_learning' title='Machine_learning'>Machine_learning</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Cybernetics' title='Cybernetics'>Cybernetics</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Algorithms' title='Algorithms'>Algorithms</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Mathematical_logic' title='Mathematical_logic'>Mathematical_logic</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Theoretical_computer_science' title='Theoretical_computer_science'>Theoretical_computer_science</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Brain' title='Brain'>Brain</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Sony_image_sensors' title='Sony_image_sensors'>Sony_image_sensors</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:English-language_magazines' title='English-language_magazines'>English-language_magazines</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Intelligence' title='Intelligence'>Intelligence</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Neuropsychological_assessment' title='Neuropsychological_assessment'>Neuropsychological_assessment</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Creativity' title='Creativity'>Creativity</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Concepts' title='Concepts'>Concepts</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Ontology' title='Ontology'>Ontology</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Light' title='Light'>Light</a> (1) | <a href='https://en.wikipedia.org/wiki/Category:Hearing' title='Hearing'>Hearing</a> (1)\n",
       "    </div>\n",
       "    </div>"
      ],
      "text/plain": [
       "<IPython.core.display.HTML object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "display(get_html(text, processor))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "181070"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(page2cats)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Interactive usage"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "from ipywidgets import interact_manual, widgets, Layout"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "5cd74bf42aa64e469f7c5df70eec3f39",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "interactive(children=(Textarea(value='Deep Learning is a subfield of machine learning concerned with algorithm…"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "text_area_widget = widgets.Textarea(\n",
    "    value=text,\n",
    "    placeholder=\"Type your text hear\",\n",
    "    description='String:',\n",
    "    disabled=False,\n",
    "    layout=Layout(width=\"90%\")\n",
    ")\n",
    "text_area_widget.rows=10;\n",
    "interact_manual(lambda text: get_html(text, processor), text=text_area_widget);"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}