.. _quick:

Quick Start
===========

.. code-block:: python

    # Conceptually, TextWiser is composed of an Embedding, potentially with a pretrained model,
    # that can be chained into zero or more Transformations
    from textwiser import TextWiser, Embedding, Transformation, WordOptions, PoolOptions

    # Data
    documents = ["Some document", "More documents. Including multi-sentence documents."]

    # Model: TFIDF `min_df` parameter gets passed to sklearn automatically
    emb = TextWiser(Embedding.TfIdf(min_df=1))

    # Model: TFIDF followed with an NMF + SVD
    emb = TextWiser(Embedding.TfIdf(min_df=1), [Transformation.NMF(n_components=30), Transformation.SVD(n_components=10)])

    # Model: Word2Vec with no pretraining that learns from the input data
    emb = TextWiser(Embedding.Word(word_option=WordOptions.word2vec, pretrained=None), Transformation.Pool(pool_option=PoolOptions.min))

    # Model: BERT with the pretrained bert-base-uncased embedding
    emb = TextWiser(Embedding.Word(word_option=WordOptions.bert), Transformation.Pool(pool_option=PoolOptions.first))

    # Features
    vecs = emb.fit_transform(documents)