Skip to content

twarc.Client2

Support for the Twitter v2 API.

Twarc2

A client for the Twitter v2 API.

__init__(self, consumer_key=None, consumer_secret=None, access_token=None, access_token_secret=None, bearer_token=None, connection_errors=0, http_errors=0, metadata=True) special

Instantiate a Twarc2 instance to talk to the Twitter V2+ API.

The client can use either App or User authentication, but only one at a time. Whether app auth or user auth is used depends on which credentials are provided on initialisation:

  1. If a bearer_token is passed, app auth is always used.
  2. If a consumer_key and consumer_secret are passed without an access_token and access_token_secret, app auth is used.
  3. If consumer_key, consumer_secret, access_token and access_token_secret are all passed, then user authentication is used instead.

Parameters:

Name Type Description Default
consumer_key str

The API key.

None
consumer_secret str

The API secret.

None
access_token str

The Access Token

None
access_token_secret str

The Access Token Secret

None
bearer_token str

Bearer Token, can be generated from API keys.

None
connection_errors int

Number of retries for GETs

0
http_errors int

Number of retries for sample stream.

0
metadata bool

Append __twarc metadata to results.

True
Source code in twarc/client2.py
def __init__(
    self,
    consumer_key=None,
    consumer_secret=None,
    access_token=None,
    access_token_secret=None,
    bearer_token=None,
    connection_errors=0,
    http_errors=0,
    metadata=True,
):
    """
    Instantiate a Twarc2 instance to talk to the Twitter V2+ API.

    The client can use either App or User authentication, but only one at a
    time. Whether app auth or user auth is used depends on which credentials
    are provided on initialisation:

    1. If a `bearer_token` is passed, app auth is always used.
    2. If a `consumer_key` and `consumer_secret` are passed without an
    `access_token` and `access_token_secret`, app auth is used.
    3. If `consumer_key`, `consumer_secret`, `access_token` and
    `access_token_secret` are all passed, then user authentication
    is used instead.

    Args:
        consumer_key (str):
            The API key.
        consumer_secret (str):
            The API secret.
        access_token (str):
            The Access Token
        access_token_secret (str):
            The Access Token Secret
        bearer_token (str):
            Bearer Token, can be generated from API keys.
        connection_errors (int):
            Number of retries for GETs
        http_errors (int):
            Number of retries for sample stream.
        metadata (bool):
            Append `__twarc` metadata to results.
    """
    self.api_version = "2"
    self.connection_errors = connection_errors
    self.http_errors = http_errors
    self.metadata = metadata
    self.bearer_token = None

    if bearer_token:
        self.bearer_token = bearer_token
        self.auth_type = "application"

    elif consumer_key and consumer_secret:
        if access_token and access_token_secret:
            self.consumer_key = consumer_key
            self.consumer_secret = consumer_secret
            self.access_token = access_token
            self.access_token_secret = access_token_secret
            self.auth_type = "user"

        else:
            self.consumer_key = consumer_key
            self.consumer_secret = consumer_secret
            self.auth_type = "application"

    else:
        raise ValueError(
            "Must pass either a bearer_token or consumer/access_token keys and secrets"
        )

    self.client = None
    self.last_response = None

    self.connect()

add_stream_rules(self, rules)

Adds new rules to the filter stream.

Calls POST /2/tweets/search/stream/rules

Parameters:

Name Type Description Default
rules list[dict]

A list of rules to add.

required

Returns:

Type Description
dict

JSON Response from Twitter API.

Source code in twarc/client2.py
@requires_app_auth
def add_stream_rules(self, rules):
    """
    Adds new rules to the filter stream.

    Calls [POST /2/tweets/search/stream/rules](https://developer.twitter.com/en/docs/twitter-api/tweets/filtered-stream/api-reference/post-tweets-search-stream-rules)

    Args:
        rules (list[dict]): A list of rules to add.

    Returns:
        dict: JSON Response from Twitter API.
    """
    url = "https://api.twitter.com/2/tweets/search/stream/rules"
    return self.post(url, {"add": rules}).json()

connect(self)

Sets up the HTTP session to talk to Twitter. If one is active it is closed and another one is opened.

Source code in twarc/client2.py
def connect(self):
    """
    Sets up the HTTP session to talk to Twitter. If one is active it is
    closed and another one is opened.
    """
    if self.last_response:
        self.last_response.close()

    if self.client:
        self.client.close()

    if self.auth_type == "application" and self.bearer_token:
        log.info("creating HTTP session headers for app auth.")
        auth = f"Bearer {self.bearer_token}"
        log.debug("authorization: %s", auth)
        self.client = requests.Session()
        self.client.headers.update({"Authorization": auth})
    elif self.auth_type == "application":
        log.info("creating app auth client via OAuth2")
        log.debug("client_id: %s", self.consumer_key)
        log.debug("client_secret: %s", self.consumer_secret)
        client = BackendApplicationClient(client_id=self.consumer_key)
        self.client = OAuth2Session(client=client)
        self.client.fetch_token(
            token_url="https://api.twitter.com/oauth2/token",
            client_id=self.consumer_key,
            client_secret=self.consumer_secret,
        )
    else:
        log.info("creating user auth client")
        log.debug("client_id: %s", self.consumer_key)
        log.debug("client_secret: %s", self.consumer_secret)
        log.debug("resource_owner_key: %s", self.access_token)
        log.debug("resource_owner_secret: %s", self.access_token_secret)
        self.client = OAuth1Session(
            client_key=self.consumer_key,
            client_secret=self.consumer_secret,
            resource_owner_key=self.access_token,
            resource_owner_secret=self.access_token_secret,
        )

delete_stream_rule_ids(self, rule_ids)

Deletes rules from the filter stream.

Calls POST /2/tweets/search/stream/rules

Parameters:

Name Type Description Default
rules list[int]

A list of rule ids to delete.

required

Returns:

Type Description
dict

JSON Response from Twitter API.

Source code in twarc/client2.py
@requires_app_auth
def delete_stream_rule_ids(self, rule_ids):
    """
    Deletes rules from the filter stream.

    Calls [POST /2/tweets/search/stream/rules](https://developer.twitter.com/en/docs/twitter-api/tweets/filtered-stream/api-reference/post-tweets-search-stream-rules)

    Args:
        rules (list[int]): A list of rule ids to delete.

    Returns:
        dict: JSON Response from Twitter API.
    """
    url = "https://api.twitter.com/2/tweets/search/stream/rules"
    return self.post(url, {"delete": {"ids": rule_ids}}).json()

followers(self, user)

Retrieve the user profiles of accounts following the given user.

Calls GET /2/users/:id/followers

Parameters:

Name Type Description Default
user int

ID of the user.

required

Returns:

Type Description
generator[dict]

A generator, dict for each page of results.

Source code in twarc/client2.py
def followers(self, user):
    """
    Retrieve the user profiles of accounts following the given user.

    Calls [GET /2/users/:id/followers](https://developer.twitter.com/en/docs/twitter-api/users/follows/api-reference/get-users-id-followers)

    Args:
        user (int): ID of the user.

    Returns:
        generator[dict]: A generator, dict for each page of results.
    """
    user_id = self._ensure_user_id(user)
    params = expansions.USER_EVERYTHING.copy()
    params["max_results"] = 1000
    url = f"https://api.twitter.com/2/users/{user_id}/followers"
    return self.get_paginated(url, params=params)

following(self, user)

Retrieve the user profiles of accounts followed by the given user.

Calls GET /2/users/:id/following

Parameters:

Name Type Description Default
user int

ID of the user.

required

Returns:

Type Description
generator[dict]

A generator, dict for each page of results.

Source code in twarc/client2.py
def following(self, user):
    """
    Retrieve the user profiles of accounts followed by the given user.

    Calls [GET /2/users/:id/following](https://developer.twitter.com/en/docs/twitter-api/users/follows/api-reference/get-users-id-following)

    Args:
        user (int): ID of the user.

    Returns:
        generator[dict]: A generator, dict for each page of results.
    """
    user_id = self._ensure_user_id(user)
    params = expansions.USER_EVERYTHING.copy()
    params["max_results"] = 1000
    url = f"https://api.twitter.com/2/users/{user_id}/following"
    return self.get_paginated(url, params=params)

get(self, *args, **kwargs)

Make a GET request to a specified URL.

Parameters:

Name Type Description Default
*args

Variable length argument list.

()
**kwargs

Arbitrary keyword arguments.

{}

Returns:

Type Description
requests.Response

Response from Twitter API.

Source code in twarc/client2.py
@rate_limit
@catch_conn_reset
@catch_timeout
@catch_gzip_errors
def get(self, *args, **kwargs):
    """
    Make a GET request to a specified URL.

    Args:
        *args: Variable length argument list.
        **kwargs: Arbitrary keyword arguments.

    Returns:
        requests.Response: Response from Twitter API.
    """

    # Pass allow 404 to not retry on 404
    allow_404 = kwargs.pop("allow_404", False)
    connection_error_count = kwargs.pop("connection_error_count", 0)
    try:
        log.info("getting %s %s", args, kwargs)
        r = self.last_response = self.client.get(
            *args, timeout=(3.05, 31), **kwargs
        )
        # this has been noticed, believe it or not
        # https://github.com/edsu/twarc/issues/75
        if r.status_code == 404 and not allow_404:
            log.warning("404 from Twitter API! trying again")
            time.sleep(1)
            r = self.get(*args, **kwargs)
        return r
    except (ssl.SSLError, ConnectionError, ProtocolError) as e:
        connection_error_count += 1
        log.error("caught connection error %s on %s try", e, connection_error_count)
        if (
            self.connection_errors
            and connection_error_count == self.connection_errors
        ):
            log.error("received too many connection errors")
            raise e
        else:
            self.connect()
            kwargs["connection_error_count"] = connection_error_count
            kwargs["allow_404"] = allow_404
            return self.get(*args, **kwargs)

get_paginated(self, *args, **kwargs)

A wrapper around the get method that handles Twitter token based pagination.

Yields one page (one API response) at a time.

Parameters:

Name Type Description Default
*args

Variable length argument list.

()
**kwargs

Arbitrary keyword arguments.

{}

Returns:

Type Description
generator[dict]

A generator, dict for each page of results.

Source code in twarc/client2.py
def get_paginated(self, *args, **kwargs):
    """
    A wrapper around the `get` method that handles Twitter token based
    pagination.

    Yields one page (one API response) at a time.

    Args:
        *args: Variable length argument list.
        **kwargs: Arbitrary keyword arguments.

    Returns:
        generator[dict]: A generator, dict for each page of results.
    """

    resp = self.get(*args, **kwargs)
    page = resp.json()

    url = args[0]

    if self.metadata:
        page = _append_metadata(page, resp.url)

    yield page

    endings = ["mentions", "tweets", "following", "followers"]

    # The search endpoints only take a next_token, but the timeline
    # endpoints take a pagination_token instead - this is a bit of a hack,
    # but check the URL ending to see which we should use.
    if any(url.endswith(end) for end in endings):
        token_param = "pagination_token"
    else:
        token_param = "next_token"

    while "meta" in page and "next_token" in page["meta"]:
        if "params" in kwargs:
            kwargs["params"][token_param] = page["meta"]["next_token"]
        else:
            kwargs["params"] = {token_param: page["meta"]["next_token"]}

        resp = self.get(*args, **kwargs)
        page = resp.json()

        if self.metadata:
            page = _append_metadata(page, resp.url)

        yield page

get_stream_rules(self)

Returns a list of rules for the filter stream.

Calls GET /2/tweets/search/stream/rules

Returns:

Type Description
dict

JSON Response from Twitter API with a list of defined rules.

Source code in twarc/client2.py
@requires_app_auth
def get_stream_rules(self):
    """
    Returns a list of rules for the filter stream.

    Calls [GET /2/tweets/search/stream/rules](https://developer.twitter.com/en/docs/twitter-api/tweets/filtered-stream/api-reference/get-tweets-search-stream-rules)

    Returns:
        dict: JSON Response from Twitter API with a list of defined rules.
    """
    url = "https://api.twitter.com/2/tweets/search/stream/rules"
    return self.get(url).json()

mentions(self, user, since_id=None, until_id=None, start_time=None, end_time=None, exclude_retweets=False, exclude_replies=False)

Retrieve up to the 800 most recent tweets mentioning the given user.

Calls GET /2/users/:id/mentions

Parameters:

Name Type Description Default
user int

ID of the user.

required
since_id int

results with a Tweet ID greater than (newer) than specified

None
until_id int

results with a Tweet ID less than (older) than specified

None
start_time datetime

oldest UTC timestamp from which the Tweets will be provided

None
end_time datetime

newest UTC timestamp from which the Tweets will be provided

None
exclude_retweets boolean

remove retweets from timeline results

False
exclude_replies boolean

remove replies from timeline results

False

Returns:

Type Description
generator[dict]

A generator, dict for each page of results.

Source code in twarc/client2.py
def mentions(
    self,
    user,
    since_id=None,
    until_id=None,
    start_time=None,
    end_time=None,
    exclude_retweets=False,
    exclude_replies=False,
):
    """
    Retrieve up to the 800 most recent tweets mentioning the given user.

    Calls [GET /2/users/:id/mentions](https://developer.twitter.com/en/docs/twitter-api/tweets/timelines/api-reference/get-users-id-mentions)

    Args:
        user (int): ID of the user.
        since_id (int): results with a Tweet ID greater than (newer) than specified
        until_id (int): results with a Tweet ID less than (older) than specified
        start_time (datetime): oldest UTC timestamp from which the Tweets will be provided
        end_time (datetime): newest UTC timestamp from which the Tweets will be provided
        exclude_retweets (boolean): remove retweets from timeline results
        exclude_replies (boolean): remove replies from timeline results

    Returns:
        generator[dict]: A generator, dict for each page of results.
    """
    user_id = self._ensure_user_id(user)
    return self._timeline(
        user_id,
        "mentions",
        since_id,
        until_id,
        start_time,
        end_time,
        exclude_retweets,
        exclude_replies,
    )

post(self, url, json_data)

Make a POST request to the specified URL.

Parameters:

Name Type Description Default
url str

URL to make a POST request

required
json_data dict

JSON data to send.

required

Returns:

Type Description
requests.Response

Response from Twitter API.

Source code in twarc/client2.py
@rate_limit
def post(self, url, json_data):
    """
    Make a POST request to the specified URL.

    Args:
        url (str): URL to make a POST request
        json_data (dict): JSON data to send.

    Returns:
        requests.Response: Response from Twitter API.
    """
    if not self.client:
        self.connect()
    return self.client.post(url, json=json_data)

sample(self, event=None, record_keepalive=False)

Returns a sample of all publicly posted tweets.

The sample is based on slices of each second, not truly randomised. The same tweets are returned for all users of this endpoint.

If a threading.Event is provided and the event is set, the sample will be interrupted. This can be used for coordination with other programs.

Calls GET /2/tweets/sample/stream

Parameters:

Name Type Description Default
event threading.Event

Manages a flag to stop the process.

None
record_keepalive bool

whether to output keep-alive events.

False

Returns:

Type Description
generator[dict]

a generator, dict for each tweet.

Source code in twarc/client2.py
@requires_app_auth
def sample(self, event=None, record_keepalive=False):
    """
    Returns a sample of all publicly posted tweets.

    The sample is based on slices of each second, not truly randomised. The
    same tweets are returned for all users of this endpoint.

    If a `threading.Event` is provided and the event is set, the
    sample will be interrupted. This can be used for coordination with other
    programs.

    Calls [GET /2/tweets/sample/stream](https://developer.twitter.com/en/docs/twitter-api/tweets/sampled-stream/api-reference/get-tweets-sample-stream)

    Args:
        event (threading.Event): Manages a flag to stop the process.
        record_keepalive (bool): whether to output keep-alive events.

    Returns:
        generator[dict]: a generator, dict for each tweet.
    """
    url = "https://api.twitter.com/2/tweets/sample/stream"
    errors = 0

    while True:
        try:
            log.info("Connecting to V2 sample stream")
            resp = self.get(url, params=expansions.EVERYTHING.copy(), stream=True)
            errors = 0
            for line in resp.iter_lines(chunk_size=512):

                # quit & close the stream if the event is set
                if event and event.is_set():
                    log.info("stopping sample")
                    resp.close()
                    return

                # return the JSON data w/ optional keep-alive
                if not line:
                    log.info("keep-alive")
                    if record_keepalive:
                        yield "keep-alive"
                    continue
                else:
                    data = json.loads(line.decode())
                    if self.metadata:
                        data = _append_metadata(data, resp.url)
                    yield data

                    # Check for an operational disconnect error in the response
                    if data.get("errors", []):
                        for error in data["errors"]:
                            if (
                                error.get("disconnect_type")
                                == "OperationalDisconnect"
                            ):
                                log.info(
                                    "Received operational disconnect message: "
                                    "This stream has fallen too far behind in "
                                    "processing tweets. Some data may have been "
                                    "lost."
                                )
                                # Sleep briefly, then break this get call and
                                # attempt to reconnect.
                                time.sleep(5)
                                break

        except requests.exceptions.HTTPError as e:
            errors += 1
            log.error("caught http error %s on %s try", e, errors)
            if self.http_errors and errors == self.http_errors:
                log.warning("too many errors")
                raise e
            if e.response.status_code == 420:
                if interruptible_sleep(errors * 60, event):
                    log.info("stopping filter")
                    return
            else:
                if interruptible_sleep(errors * 5, event):
                    log.info("stopping filter")
                    return

search_all(self, query, since_id=None, until_id=None, start_time=None, end_time=None, max_results=500)

Search Twitter for the given query in the full archive, using the /search/all endpoint (Requires Academic Access).

Calls GET /2/tweets/search/all

Parameters:

Name Type Description Default
query str

The query string to be passed directly to the Twitter API.

required
since_id int

Return all tweets since this tweet_id.

None
until_id int

Return all tweets up to this tweet_id.

None
start_time datetime

Return all tweets after this time (UTC datetime).

None
end_time datetime

Return all tweets before this time (UTC datetime).

None
max_results int

The maximum number of results per request. Max is 500.

500

Returns:

Type Description
generator[dict]

a generator, dict for each paginated response.

Source code in twarc/client2.py
@requires_app_auth
def search_all(
    self,
    query,
    since_id=None,
    until_id=None,
    start_time=None,
    end_time=None,
    max_results=500,
):
    """
    Search Twitter for the given query in the full archive,
    using the `/search/all` endpoint (Requires Academic Access).

    Calls [GET /2/tweets/search/all](https://developer.twitter.com/en/docs/twitter-api/tweets/search/api-reference/get-tweets-search-all)

    Args:
        query (str):
            The query string to be passed directly to the Twitter API.
        since_id (int):
            Return all tweets since this tweet_id.
        until_id (int):
            Return all tweets up to this tweet_id.
        start_time (datetime):
            Return all tweets after this time (UTC datetime).
        end_time (datetime):
            Return all tweets before this time (UTC datetime).
        max_results (int):
            The maximum number of results per request. Max is 500.

    Returns:
        generator[dict]: a generator, dict for each paginated response.
    """
    url = "https://api.twitter.com/2/tweets/search/all"

    # start time defaults to the beginning of Twitter to override the
    # default of the last month. Only do this if start_time is not already
    # specified and since_id isn't being used
    if start_time is None and since_id is None:
        start_time = TWITTER_EPOCH

    return self._search(
        url,
        query,
        since_id,
        until_id,
        start_time,
        end_time,
        max_results,
        sleep_between=1.05,
    )

search_recent(self, query, since_id=None, until_id=None, start_time=None, end_time=None, max_results=100)

Search Twitter for the given query in the last seven days, using the /search/recent endpoint.

Calls GET /2/tweets/search/recent

Parameters:

Name Type Description Default
query str

The query string to be passed directly to the Twitter API.

required
since_id int

Return all tweets since this tweet_id.

None
until_id int

Return all tweets up to this tweet_id.

None
start_time datetime

Return all tweets after this time (UTC datetime).

None
end_time datetime

Return all tweets before this time (UTC datetime).

None
max_results int

The maximum number of results per request. Max is 100.

100

Returns:

Type Description
generator[dict]

a generator, dict for each paginated response.

Source code in twarc/client2.py
def search_recent(
    self,
    query,
    since_id=None,
    until_id=None,
    start_time=None,
    end_time=None,
    max_results=100,
):
    """
    Search Twitter for the given query in the last seven days,
    using the `/search/recent` endpoint.

    Calls [GET /2/tweets/search/recent](https://developer.twitter.com/en/docs/twitter-api/tweets/search/api-reference/get-tweets-search-recent)

    Args:
        query (str):
            The query string to be passed directly to the Twitter API.
        since_id (int):
            Return all tweets since this tweet_id.
        until_id (int):
            Return all tweets up to this tweet_id.
        start_time (datetime):
            Return all tweets after this time (UTC datetime).
        end_time (datetime):
            Return all tweets before this time (UTC datetime).
        max_results (int):
            The maximum number of results per request. Max is 100.

    Returns:
        generator[dict]: a generator, dict for each paginated response.
    """
    url = "https://api.twitter.com/2/tweets/search/recent"
    return self._search(
        url, query, since_id, until_id, start_time, end_time, max_results
    )

stream(self, event=None, record_keep_alives=False)

Returns a stream of tweets matching the defined rules.

Rules can be added or removed out-of-band, without disconnecting. Tweet results will contain metadata about the rule that matched it.

If event is set with a threading.Event object, the sample stream will be interrupted. This can be used for coordination with other programs.

Calls GET /2/tweets/search/stream

Parameters:

Name Type Description Default
event threading.Event

Manages a flag to stop the process.

None
record_keepalive bool

whether to output keep-alive events.

required

Returns:

Type Description
generator[dict]

a generator, dict for each tweet.

Source code in twarc/client2.py
@requires_app_auth
def stream(self, event=None, record_keep_alives=False):
    """
    Returns a stream of tweets matching the defined rules.

    Rules can be added or removed out-of-band, without disconnecting.
    Tweet results will contain metadata about the rule that matched it.

    If event is set with a threading.Event object, the sample stream
    will be interrupted. This can be used for coordination with other
    programs.

    Calls [GET /2/tweets/search/stream](https://developer.twitter.com/en/docs/twitter-api/tweets/filtered-stream/api-reference/get-tweets-search-stream)

    Args:
        event (threading.Event): Manages a flag to stop the process.
        record_keepalive (bool): whether to output keep-alive events.

    Returns:
        generator[dict]: a generator, dict for each tweet.
    """
    url = "https://api.twitter.com/2/tweets/search/stream"
    params = expansions.EVERYTHING.copy()
    resp = self.get(url, params=params, stream=True)
    for line in resp.iter_lines():

        # quit & close the stream if the event is set
        if event and event.is_set():
            log.info("stopping filter")
            resp.close()
            return

        if line == b"":
            log.info("keep-alive")
            if record_keep_alives:
                yield "keep-alive"
        else:
            data = json.loads(line.decode())
            if self.metadata:
                data = _append_metadata(data, resp.url)

            yield data

timeline(self, user, since_id=None, until_id=None, start_time=None, end_time=None, exclude_retweets=False, exclude_replies=False)

Retrieve up to the 3200 most recent tweets made by the given user.

Calls GET /2/users/:id/tweets

Parameters:

Name Type Description Default
user int

ID of the user.

required
since_id int

results with a Tweet ID greater than (newer) than specified

None
until_id int

results with a Tweet ID less than (older) than specified

None
start_time datetime

oldest UTC timestamp from which the Tweets will be provided

None
end_time datetime

newest UTC timestamp from which the Tweets will be provided

None
exclude_retweets boolean

remove retweets from timeline results

False
exclude_replies boolean

remove replies from timeline results

False

Returns:

Type Description
generator[dict]

A generator, dict for each page of results.

Source code in twarc/client2.py
def timeline(
    self,
    user,
    since_id=None,
    until_id=None,
    start_time=None,
    end_time=None,
    exclude_retweets=False,
    exclude_replies=False,
):
    """
    Retrieve up to the 3200 most recent tweets made by the given user.

    Calls [GET /2/users/:id/tweets](https://developer.twitter.com/en/docs/twitter-api/tweets/timelines/api-reference/get-users-id-tweets)

    Args:
        user (int): ID of the user.
        since_id (int): results with a Tweet ID greater than (newer) than specified
        until_id (int): results with a Tweet ID less than (older) than specified
        start_time (datetime): oldest UTC timestamp from which the Tweets will be provided
        end_time (datetime): newest UTC timestamp from which the Tweets will be provided
        exclude_retweets (boolean): remove retweets from timeline results
        exclude_replies (boolean): remove replies from timeline results

    Returns:
        generator[dict]: A generator, dict for each page of results.
    """
    user_id = self._ensure_user_id(user)
    return self._timeline(
        user_id,
        "tweets",
        since_id,
        until_id,
        start_time,
        end_time,
        exclude_retweets,
        exclude_replies,
    )

tweet_lookup(self, tweet_ids)

Lookup tweets, taking an iterator of IDs and returning pages of fully expanded tweet objects.

This can be used to rehydrate a collection shared as only tweet IDs. Yields one page of tweets at a time, in blocks of up to 100.

Calls GET /2/tweets

Parameters:

Name Type Description Default
tweet_ids iterable

A list of tweet IDs

required

Returns:

Type Description
generator[dict]

a generator, dict for each batch of 100 tweets.

Source code in twarc/client2.py
def tweet_lookup(self, tweet_ids):
    """
    Lookup tweets, taking an iterator of IDs and returning pages of fully
    expanded tweet objects.

    This can be used to rehydrate a collection shared as only tweet IDs.
    Yields one page of tweets at a time, in blocks of up to 100.

    Calls [GET /2/tweets](https://developer.twitter.com/en/docs/twitter-api/tweets/lookup/api-reference/get-tweets)

    Args:
        tweet_ids (iterable): A list of tweet IDs

    Returns:
        generator[dict]: a generator, dict for each batch of 100 tweets.
    """

    def lookup_batch(tweet_id):

        url = "https://api.twitter.com/2/tweets"

        params = expansions.EVERYTHING.copy()
        params["ids"] = ",".join(tweet_id)

        resp = self.get(url, params=params)
        data = resp.json()

        if self.metadata:
            data = _append_metadata(data, resp.url)

        return data

    tweet_id_batch = []

    for tweet_id in tweet_ids:
        tweet_id_batch.append(str(int(tweet_id)))

        if len(tweet_id_batch) == 100:
            yield lookup_batch(tweet_id_batch)
            tweet_id_batch = []

    if tweet_id_batch:
        yield (lookup_batch(tweet_id_batch))

user_lookup(self, users, usernames=False)

Returns fully populated user profiles for the given iterator of user_id or usernames. By default user_lookup expects user ids but if you want to pass in usernames set usernames = True.

Yields one page of results at a time (in blocks of at most 100 user profiles).

Calls GET /2/users

Parameters:

Name Type Description Default
users iterable

User IDs or usernames to lookup.

required
usernames bool

Parse users as usernames, not IDs.

False

Returns:

Type Description
generator[dict]

a generator, dict for each batch of 100 users.

Source code in twarc/client2.py
def user_lookup(self, users, usernames=False):
    """
    Returns fully populated user profiles for the given iterator of
    user_id or usernames. By default user_lookup expects user ids but if
    you want to pass in usernames set usernames = True.

    Yields one page of results at a time (in blocks of at most 100 user
    profiles).

    Calls [GET /2/users](https://developer.twitter.com/en/docs/twitter-api/users/lookup/api-reference/get-users)

    Args:
        users (iterable): User IDs or usernames to lookup.
        usernames (bool): Parse `users` as usernames, not IDs.

    Returns:
        generator[dict]: a generator, dict for each batch of 100 users.
    """

    if usernames:
        url = "https://api.twitter.com/2/users/by"
    else:
        url = "https://api.twitter.com/2/users"

    def lookup_batch(users):
        params = expansions.USER_EVERYTHING.copy()
        if usernames:
            params["usernames"] = ",".join(users)
        else:
            params["ids"] = ",".join(users)

        resp = self.get(url, params=params)
        data = resp.json()

        if self.metadata:
            data = _append_metadata(data, resp.url)

        return data

    batch = []
    for item in users:
        batch.append(str(item).strip())
        if len(batch) == 100:
            yield lookup_batch(batch)
            batch = []

    if batch:
        yield (lookup_batch(batch))

handler: python