twarc.Client2
Support for the Twitter v2 API.
Twarc2
A client for the Twitter v2 API.
__init__(self, consumer_key=None, consumer_secret=None, access_token=None, access_token_secret=None, bearer_token=None, connection_errors=0, metadata=True)
special
Instantiate a Twarc2 instance to talk to the Twitter V2+ API.
The client can use either App or User authentication, but only one at a time. Whether app auth or user auth is used depends on which credentials are provided on initialisation:
- If a
bearer_token
is passed, app auth is always used. - If a
consumer_key
andconsumer_secret
are passed without anaccess_token
andaccess_token_secret
, app auth is used. - If
consumer_key
,consumer_secret
,access_token
andaccess_token_secret
are all passed, then user authentication is used instead.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
consumer_key |
str |
The API key. |
None |
consumer_secret |
str |
The API secret. |
None |
access_token |
str |
The Access Token |
None |
access_token_secret |
str |
The Access Token Secret |
None |
bearer_token |
str |
Bearer Token, can be generated from API keys. |
None |
connection_errors |
int |
Number of retries for GETs |
0 |
metadata |
bool |
Append |
True |
Source code in twarc/client2.py
def __init__(
self,
consumer_key=None,
consumer_secret=None,
access_token=None,
access_token_secret=None,
bearer_token=None,
connection_errors=0,
metadata=True,
):
"""
Instantiate a Twarc2 instance to talk to the Twitter V2+ API.
The client can use either App or User authentication, but only one at a
time. Whether app auth or user auth is used depends on which credentials
are provided on initialisation:
1. If a `bearer_token` is passed, app auth is always used.
2. If a `consumer_key` and `consumer_secret` are passed without an
`access_token` and `access_token_secret`, app auth is used.
3. If `consumer_key`, `consumer_secret`, `access_token` and
`access_token_secret` are all passed, then user authentication
is used instead.
Args:
consumer_key (str):
The API key.
consumer_secret (str):
The API secret.
access_token (str):
The Access Token
access_token_secret (str):
The Access Token Secret
bearer_token (str):
Bearer Token, can be generated from API keys.
connection_errors (int):
Number of retries for GETs
metadata (bool):
Append `__twarc` metadata to results.
"""
self.api_version = "2"
self.connection_errors = connection_errors
self.metadata = metadata
self.bearer_token = None
if bearer_token:
self.bearer_token = bearer_token
self.auth_type = "application"
elif consumer_key and consumer_secret:
if access_token and access_token_secret:
self.consumer_key = consumer_key
self.consumer_secret = consumer_secret
self.access_token = access_token
self.access_token_secret = access_token_secret
self.auth_type = "user"
else:
self.consumer_key = consumer_key
self.consumer_secret = consumer_secret
self.auth_type = "application"
else:
raise ValueError(
"Must pass either a bearer_token or consumer/access_token keys and secrets"
)
self.client = None
self.last_response = None
self.connect()
add_stream_rules(self, rules)
Adds new rules to the filter stream.
Calls POST /2/tweets/search/stream/rules
Parameters:
Name | Type | Description | Default |
---|---|---|---|
rules |
list[dict] |
A list of rules to add. |
required |
Returns:
Type | Description |
---|---|
dict |
JSON Response from Twitter API. |
Source code in twarc/client2.py
@requires_app_auth
def add_stream_rules(self, rules):
"""
Adds new rules to the filter stream.
Calls [POST /2/tweets/search/stream/rules](https://developer.twitter.com/en/docs/twitter-api/tweets/filtered-stream/api-reference/post-tweets-search-stream-rules)
Args:
rules (list[dict]): A list of rules to add.
Returns:
dict: JSON Response from Twitter API.
"""
url = "https://api.twitter.com/2/tweets/search/stream/rules"
return self.post(url, {"add": rules}).json()
compliance_job_create(self, job_type, job_name, resumable=False)
Creates a new compliace job.
Calls POST /2/compliance/jobs
Parameters:
Name | Type | Description | Default |
---|---|---|---|
job_type |
str |
The type of job to create. Either 'tweets' or 'users'. |
required |
job_name |
str |
Optional name for the job. |
required |
resumable |
bool |
Whether or not the job upload is resumable. |
False |
Source code in twarc/client2.py
@requires_app_auth
def compliance_job_create(self, job_type, job_name, resumable=False):
"""
Creates a new compliace job.
Calls [POST /2/compliance/jobs](https://developer.twitter.com/en/docs/twitter-api/compliance/batch-compliance/api-reference/post-compliance-jobs)
Args:
job_type (str): The type of job to create. Either 'tweets' or 'users'.
job_name (str): Optional name for the job.
resumable (bool): Whether or not the job upload is resumable.
"""
payload = {}
payload["type"] = job_type
payload["resumable"] = resumable
if job_name:
payload["name"] = job_name
result = self.client.post(
"https://api.twitter.com/2/compliance/jobs", json=payload
)
if result.status_code == 200:
result = result.json()
else:
raise ValueError(f"Error from API, response: {result.status_code}")
if "data" in result:
return result
else:
raise ValueError(f"Unknown response from twitter: {result}")
compliance_job_get(self, job_id)
Returns a compliance job.
Calls GET /2/compliance/jobs/{job_id}
Parameters:
Name | Type | Description | Default |
---|---|---|---|
job_id |
int |
The ID of the compliance job. |
required |
Returns:
Type | Description |
---|---|
dict |
A compliance job. |
Source code in twarc/client2.py
@requires_app_auth
def compliance_job_get(self, job_id):
"""
Returns a compliance job.
Calls [GET /2/compliance/jobs/{job_id}](https://developer.twitter.com/en/docs/twitter-api/compliance/batch-compliance/api-reference/get-compliance-jobs-id)
Args:
job_id (int): The ID of the compliance job.
Returns:
dict: A compliance job.
"""
result = self.client.get(
"https://api.twitter.com/2/compliance/jobs/{}".format(job_id)
)
if result.status_code == 200:
result = result.json()
else:
raise ValueError(f"Error from API, response: {result.status_code}")
if "data" in result:
return result
else:
raise ValueError(f"Unknown response from twitter: {result}")
compliance_job_list(self, job_type, status)
Returns list of compliance jobs.
Calls GET /2/compliance/jobs
Parameters:
Name | Type | Description | Default |
---|---|---|---|
job_type |
str |
Filter by job type - either tweets or users. |
required |
status |
str |
Filter by job status. Only one of 'created', 'in_progress', 'complete', 'failed' can be specified. If not set, returns all. |
required |
Returns:
Type | Description |
---|---|
list[dict] |
A list of jobs. |
Source code in twarc/client2.py
@requires_app_auth
def compliance_job_list(self, job_type, status):
"""
Returns list of compliance jobs.
Calls [GET /2/compliance/jobs](https://developer.twitter.com/en/docs/twitter-api/compliance/batch-compliance/api-reference/get-compliance-jobs)
Args:
job_type (str): Filter by job type - either tweets or users.
status (str): Filter by job status. Only one of 'created', 'in_progress', 'complete', 'failed' can be specified. If not set, returns all.
Returns:
list[dict]: A list of jobs.
"""
params = {}
if job_type:
params["type"] = job_type
if status:
params["status"] = status
result = self.client.get(
"https://api.twitter.com/2/compliance/jobs", params=params
).json()
if "data" in result or not result:
return result
else:
raise ValueError(f"Unknown response from twitter: {result}")
connect(self)
Sets up the HTTP session to talk to Twitter. If one is active it is closed and another one is opened.
Source code in twarc/client2.py
def connect(self):
"""
Sets up the HTTP session to talk to Twitter. If one is active it is
closed and another one is opened.
"""
if self.last_response:
self.last_response.close()
if self.client:
self.client.close()
if self.auth_type == "application" and self.bearer_token:
log.info("creating HTTP session headers for app auth.")
auth = f"Bearer {self.bearer_token}"
log.debug("authorization: %s", auth)
self.client = requests.Session()
self.client.headers.update({"Authorization": auth})
elif self.auth_type == "application":
log.info("creating app auth client via OAuth2")
log.debug("client_id: %s", self.consumer_key)
log.debug("client_secret: %s", self.consumer_secret)
client = BackendApplicationClient(client_id=self.consumer_key)
self.client = OAuth2Session(client=client)
self.client.fetch_token(
token_url="https://api.twitter.com/oauth2/token",
client_id=self.consumer_key,
client_secret=self.consumer_secret,
)
else:
log.info("creating user auth client")
log.debug("client_id: %s", self.consumer_key)
log.debug("client_secret: %s", self.consumer_secret)
log.debug("resource_owner_key: %s", self.access_token)
log.debug("resource_owner_secret: %s", self.access_token_secret)
self.client = OAuth1Session(
client_key=self.consumer_key,
client_secret=self.consumer_secret,
resource_owner_key=self.access_token,
resource_owner_secret=self.access_token_secret,
)
counts_all(self, query, since_id=None, until_id=None, start_time=None, end_time=None, granularity='hour')
Retrieve counts for the given query in the full archive,
using the /search/all
endpoint (Requires Academic Access).
Calls GET /2/tweets/counts/all
Parameters:
Name | Type | Description | Default |
---|---|---|---|
query |
str |
The query string to be passed directly to the Twitter API. |
required |
since_id |
int |
Return all tweets since this tweet_id. |
None |
until_id |
int |
Return all tweets up to this tweet_id. |
None |
start_time |
datetime |
Return all tweets after this time (UTC datetime). |
None |
end_time |
datetime |
Return all tweets before this time (UTC datetime). |
None |
granularity |
str |
Count aggregation level: |
'hour' |
Returns:
Type | Description |
---|---|
generator[dict] |
a generator, dict for each paginated response. |
Source code in twarc/client2.py
@requires_app_auth
def counts_all(
self,
query,
since_id=None,
until_id=None,
start_time=None,
end_time=None,
granularity="hour",
):
"""
Retrieve counts for the given query in the full archive,
using the `/search/all` endpoint (Requires Academic Access).
Calls [GET /2/tweets/counts/all]()
Args:
query (str):
The query string to be passed directly to the Twitter API.
since_id (int):
Return all tweets since this tweet_id.
until_id (int):
Return all tweets up to this tweet_id.
start_time (datetime):
Return all tweets after this time (UTC datetime).
end_time (datetime):
Return all tweets before this time (UTC datetime).
granularity (str):
Count aggregation level: `day`, `hour`, `minute`.
Default is `hour`.
Returns:
generator[dict]: a generator, dict for each paginated response.
"""
url = "https://api.twitter.com/2/tweets/counts/all"
return self._search(
url,
query,
since_id,
until_id,
start_time,
end_time,
None,
granularity,
sleep_between=1.05,
)
counts_recent(self, query, since_id=None, until_id=None, start_time=None, end_time=None, granularity='hour')
Retrieve counts for the given query in the last seven days,
using the /counts/recent
endpoint.
Calls GET /2/tweets/counts/recent
Parameters:
Name | Type | Description | Default |
---|---|---|---|
query |
str |
The query string to be passed directly to the Twitter API. |
required |
since_id |
int |
Return all tweets since this tweet_id. |
None |
until_id |
int |
Return all tweets up to this tweet_id. |
None |
start_time |
datetime |
Return all tweets after this time (UTC datetime). |
None |
end_time |
datetime |
Return all tweets before this time (UTC datetime). |
None |
granularity |
str |
Count aggregation level: |
'hour' |
Returns:
Type | Description |
---|---|
generator[dict] |
a generator, dict for each paginated response. |
Source code in twarc/client2.py
@requires_app_auth
def counts_recent(
self,
query,
since_id=None,
until_id=None,
start_time=None,
end_time=None,
granularity="hour",
):
"""
Retrieve counts for the given query in the last seven days,
using the `/counts/recent` endpoint.
Calls [GET /2/tweets/counts/recent]()
Args:
query (str):
The query string to be passed directly to the Twitter API.
since_id (int):
Return all tweets since this tweet_id.
until_id (int):
Return all tweets up to this tweet_id.
start_time (datetime):
Return all tweets after this time (UTC datetime).
end_time (datetime):
Return all tweets before this time (UTC datetime).
granularity (str):
Count aggregation level: `day`, `hour`, `minute`.
Default is `hour`.
Returns:
generator[dict]: a generator, dict for each paginated response.
"""
url = "https://api.twitter.com/2/tweets/counts/recent"
return self._search(
url, query, since_id, until_id, start_time, end_time, None, granularity
)
delete_stream_rule_ids(self, rule_ids)
Deletes rules from the filter stream.
Calls POST /2/tweets/search/stream/rules
Parameters:
Name | Type | Description | Default |
---|---|---|---|
rule_ids |
list[int] |
A list of rule ids to delete. |
required |
Returns:
Type | Description |
---|---|
dict |
JSON Response from Twitter API. |
Source code in twarc/client2.py
@requires_app_auth
def delete_stream_rule_ids(self, rule_ids):
"""
Deletes rules from the filter stream.
Calls [POST /2/tweets/search/stream/rules](https://developer.twitter.com/en/docs/twitter-api/tweets/filtered-stream/api-reference/post-tweets-search-stream-rules)
Args:
rule_ids (list[int]): A list of rule ids to delete.
Returns:
dict: JSON Response from Twitter API.
"""
url = "https://api.twitter.com/2/tweets/search/stream/rules"
return self.post(url, {"delete": {"ids": rule_ids}}).json()
followers(self, user, user_id=None)
Retrieve the user profiles of accounts following the given user.
Calls GET /2/users/:id/followers
Parameters:
Name | Type | Description | Default |
---|---|---|---|
user |
int |
ID of the user. |
required |
Returns:
Type | Description |
---|---|
generator[dict] |
A generator, dict for each page of results. |
Source code in twarc/client2.py
def followers(self, user, user_id=None):
"""
Retrieve the user profiles of accounts following the given user.
Calls [GET /2/users/:id/followers](https://developer.twitter.com/en/docs/twitter-api/users/follows/api-reference/get-users-id-followers)
Args:
user (int): ID of the user.
Returns:
generator[dict]: A generator, dict for each page of results.
"""
user_id = self._ensure_user_id(user) if not user_id else user_id
params = expansions.USER_EVERYTHING.copy()
params["max_results"] = 1000
url = f"https://api.twitter.com/2/users/{user_id}/followers"
return self.get_paginated(url, params=params)
following(self, user, user_id=None)
Retrieve the user profiles of accounts followed by the given user.
Calls GET /2/users/:id/following
Parameters:
Name | Type | Description | Default |
---|---|---|---|
user |
int |
ID of the user. |
required |
Returns:
Type | Description |
---|---|
generator[dict] |
A generator, dict for each page of results. |
Source code in twarc/client2.py
def following(self, user, user_id=None):
"""
Retrieve the user profiles of accounts followed by the given user.
Calls [GET /2/users/:id/following](https://developer.twitter.com/en/docs/twitter-api/users/follows/api-reference/get-users-id-following)
Args:
user (int): ID of the user.
Returns:
generator[dict]: A generator, dict for each page of results.
"""
user_id = self._ensure_user_id(user) if not user_id else user_id
params = expansions.USER_EVERYTHING.copy()
params["max_results"] = 1000
url = f"https://api.twitter.com/2/users/{user_id}/following"
return self.get_paginated(url, params=params)
get(self, *args, **kwargs)
Make a GET request to a specified URL.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
*args |
Variable length argument list. |
() |
|
**kwargs |
Arbitrary keyword arguments. |
{} |
Returns:
Type | Description |
---|---|
requests.Response |
Response from Twitter API. |
Source code in twarc/client2.py
@catch_request_exceptions
@rate_limit
def get(self, *args, **kwargs):
"""
Make a GET request to a specified URL.
Args:
*args: Variable length argument list.
**kwargs: Arbitrary keyword arguments.
Returns:
requests.Response: Response from Twitter API.
"""
if not self.client:
self.connect()
log.info("getting %s %s", args, kwargs)
r = self.last_response = self.client.get(*args, timeout=(3.05, 31), **kwargs)
return r
get_paginated(self, *args, **kwargs)
A wrapper around the get
method that handles Twitter token based
pagination.
Yields one page (one API response) at a time.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
*args |
Variable length argument list. |
() |
|
**kwargs |
Arbitrary keyword arguments. |
{} |
Returns:
Type | Description |
---|---|
generator[dict] |
A generator, dict for each page of results. |
Source code in twarc/client2.py
def get_paginated(self, *args, **kwargs):
"""
A wrapper around the `get` method that handles Twitter token based
pagination.
Yields one page (one API response) at a time.
Args:
*args: Variable length argument list.
**kwargs: Arbitrary keyword arguments.
Returns:
generator[dict]: A generator, dict for each page of results.
"""
resp = self.get(*args, **kwargs)
page = resp.json()
url = args[0]
if self.metadata:
page = _append_metadata(page, resp.url)
yield page
endings = ["mentions", "tweets", "following", "followers"]
# The search endpoints only take a next_token, but the timeline
# endpoints take a pagination_token instead - this is a bit of a hack,
# but check the URL ending to see which we should use.
if any(url.endswith(end) for end in endings):
token_param = "pagination_token"
else:
token_param = "next_token"
while "meta" in page and "next_token" in page["meta"]:
if "params" in kwargs:
kwargs["params"][token_param] = page["meta"]["next_token"]
else:
kwargs["params"] = {token_param: page["meta"]["next_token"]}
resp = self.get(*args, **kwargs)
page = resp.json()
if self.metadata:
page = _append_metadata(page, resp.url)
yield page
get_stream_rules(self)
Returns a list of rules for the filter stream.
Calls GET /2/tweets/search/stream/rules
Returns:
Type | Description |
---|---|
dict |
JSON Response from Twitter API with a list of defined rules. |
Source code in twarc/client2.py
@requires_app_auth
def get_stream_rules(self):
"""
Returns a list of rules for the filter stream.
Calls [GET /2/tweets/search/stream/rules](https://developer.twitter.com/en/docs/twitter-api/tweets/filtered-stream/api-reference/get-tweets-search-stream-rules)
Returns:
dict: JSON Response from Twitter API with a list of defined rules.
"""
url = "https://api.twitter.com/2/tweets/search/stream/rules"
return self.get(url).json()
mentions(self, user, since_id=None, until_id=None, start_time=None, end_time=None, exclude_retweets=False, exclude_replies=False)
Retrieve up to the 800 most recent tweets mentioning the given user.
Calls GET /2/users/:id/mentions
Parameters:
Name | Type | Description | Default |
---|---|---|---|
user |
int |
ID of the user. |
required |
since_id |
int |
results with a Tweet ID greater than (newer) than specified |
None |
until_id |
int |
results with a Tweet ID less than (older) than specified |
None |
start_time |
datetime |
oldest UTC timestamp from which the Tweets will be provided |
None |
end_time |
datetime |
newest UTC timestamp from which the Tweets will be provided |
None |
exclude_retweets |
boolean |
remove retweets from timeline results |
False |
exclude_replies |
boolean |
remove replies from timeline results |
False |
Returns:
Type | Description |
---|---|
generator[dict] |
A generator, dict for each page of results. |
Source code in twarc/client2.py
def mentions(
self,
user,
since_id=None,
until_id=None,
start_time=None,
end_time=None,
exclude_retweets=False,
exclude_replies=False,
):
"""
Retrieve up to the 800 most recent tweets mentioning the given user.
Calls [GET /2/users/:id/mentions](https://developer.twitter.com/en/docs/twitter-api/tweets/timelines/api-reference/get-users-id-mentions)
Args:
user (int): ID of the user.
since_id (int): results with a Tweet ID greater than (newer) than specified
until_id (int): results with a Tweet ID less than (older) than specified
start_time (datetime): oldest UTC timestamp from which the Tweets will be provided
end_time (datetime): newest UTC timestamp from which the Tweets will be provided
exclude_retweets (boolean): remove retweets from timeline results
exclude_replies (boolean): remove replies from timeline results
Returns:
generator[dict]: A generator, dict for each page of results.
"""
user_id = self._ensure_user_id(user)
return self._timeline(
user_id,
"mentions",
since_id,
until_id,
start_time,
end_time,
exclude_retweets,
exclude_replies,
)
post(self, url, json_data)
Make a POST request to the specified URL.
Parameters:
Name | Type | Description | Default |
---|---|---|---|
url |
str |
URL to make a POST request |
required |
json_data |
dict |
JSON data to send. |
required |
Returns:
Type | Description |
---|---|
requests.Response |
Response from Twitter API. |
Source code in twarc/client2.py
@catch_request_exceptions
@rate_limit
def post(self, url, json_data):
"""
Make a POST request to the specified URL.
Args:
url (str): URL to make a POST request
json_data (dict): JSON data to send.
Returns:
requests.Response: Response from Twitter API.
"""
if not self.client:
self.connect()
return self.client.post(url, json=json_data)
sample(self, event=None, record_keepalive=False)
Returns a sample of all publicly posted tweets.
The sample is based on slices of each second, not truly randomised. The same tweets are returned for all users of this endpoint.
If a threading.Event
is provided and the event is set, the
sample will be interrupted. This can be used for coordination with other
programs.
Calls GET /2/tweets/sample/stream
Parameters:
Name | Type | Description | Default |
---|---|---|---|
event |
threading.Event |
Manages a flag to stop the process. |
None |
record_keepalive |
bool |
whether to output keep-alive events. |
False |
Returns:
Type | Description |
---|---|
generator[dict] |
a generator, dict for each tweet. |
Source code in twarc/client2.py
@catch_request_exceptions
@requires_app_auth
def sample(self, event=None, record_keepalive=False):
"""
Returns a sample of all publicly posted tweets.
The sample is based on slices of each second, not truly randomised. The
same tweets are returned for all users of this endpoint.
If a `threading.Event` is provided and the event is set, the
sample will be interrupted. This can be used for coordination with other
programs.
Calls [GET /2/tweets/sample/stream](https://developer.twitter.com/en/docs/twitter-api/tweets/sampled-stream/api-reference/get-tweets-sample-stream)
Args:
event (threading.Event): Manages a flag to stop the process.
record_keepalive (bool): whether to output keep-alive events.
Returns:
generator[dict]: a generator, dict for each tweet.
"""
url = "https://api.twitter.com/2/tweets/sample/stream"
params = expansions.EVERYTHING.copy()
yield from self._stream(url, params, event, record_keepalive)
search_all(self, query, since_id=None, until_id=None, start_time=None, end_time=None, max_results=100)
Search Twitter for the given query in the full archive,
using the /search/all
endpoint (Requires Academic Access).
Calls GET /2/tweets/search/all
Parameters:
Name | Type | Description | Default |
---|---|---|---|
query |
str |
The query string to be passed directly to the Twitter API. |
required |
since_id |
int |
Return all tweets since this tweet_id. |
None |
until_id |
int |
Return all tweets up to this tweet_id. |
None |
start_time |
datetime |
Return all tweets after this time (UTC datetime). If none of start_time, since_id, or until_id are specified, this defaults to 2006-3-21 to search the entire history of Twitter. |
None |
end_time |
datetime |
Return all tweets before this time (UTC datetime). |
None |
max_results |
int |
The maximum number of results per request. Max is 500. |
100 |
Returns:
Type | Description |
---|---|
generator[dict] |
a generator, dict for each paginated response. |
Source code in twarc/client2.py
@requires_app_auth
def search_all(
self,
query,
since_id=None,
until_id=None,
start_time=None,
end_time=None,
max_results=100, # temp fix for #504
):
"""
Search Twitter for the given query in the full archive,
using the `/search/all` endpoint (Requires Academic Access).
Calls [GET /2/tweets/search/all](https://developer.twitter.com/en/docs/twitter-api/tweets/search/api-reference/get-tweets-search-all)
Args:
query (str):
The query string to be passed directly to the Twitter API.
since_id (int):
Return all tweets since this tweet_id.
until_id (int):
Return all tweets up to this tweet_id.
start_time (datetime):
Return all tweets after this time (UTC datetime). If none of start_time, since_id, or until_id
are specified, this defaults to 2006-3-21 to search the entire history of Twitter.
end_time (datetime):
Return all tweets before this time (UTC datetime).
max_results (int):
The maximum number of results per request. Max is 500.
Returns:
generator[dict]: a generator, dict for each paginated response.
"""
url = "https://api.twitter.com/2/tweets/search/all"
# start time defaults to the beginning of Twitter to override the
# default of the last month. Only do this if start_time is not already
# specified and since_id and until_id aren't being used
if start_time is None and since_id is None and until_id is None:
start_time = datetime.datetime(2006, 3, 21, tzinfo=datetime.timezone.utc)
return self._search(
url,
query,
since_id,
until_id,
start_time,
end_time,
max_results,
sleep_between=1.05,
)
search_recent(self, query, since_id=None, until_id=None, start_time=None, end_time=None, max_results=100)
Search Twitter for the given query in the last seven days,
using the /search/recent
endpoint.
Calls GET /2/tweets/search/recent
Parameters:
Name | Type | Description | Default |
---|---|---|---|
query |
str |
The query string to be passed directly to the Twitter API. |
required |
since_id |
int |
Return all tweets since this tweet_id. |
None |
until_id |
int |
Return all tweets up to this tweet_id. |
None |
start_time |
datetime |
Return all tweets after this time (UTC datetime). |
None |
end_time |
datetime |
Return all tweets before this time (UTC datetime). |
None |
max_results |
int |
The maximum number of results per request. Max is 100. |
100 |
Returns:
Type | Description |
---|---|
generator[dict] |
a generator, dict for each paginated response. |
Source code in twarc/client2.py
def search_recent(
self,
query,
since_id=None,
until_id=None,
start_time=None,
end_time=None,
max_results=100,
):
"""
Search Twitter for the given query in the last seven days,
using the `/search/recent` endpoint.
Calls [GET /2/tweets/search/recent](https://developer.twitter.com/en/docs/twitter-api/tweets/search/api-reference/get-tweets-search-recent)
Args:
query (str):
The query string to be passed directly to the Twitter API.
since_id (int):
Return all tweets since this tweet_id.
until_id (int):
Return all tweets up to this tweet_id.
start_time (datetime):
Return all tweets after this time (UTC datetime).
end_time (datetime):
Return all tweets before this time (UTC datetime).
max_results (int):
The maximum number of results per request. Max is 100.
Returns:
generator[dict]: a generator, dict for each paginated response.
"""
url = "https://api.twitter.com/2/tweets/search/recent"
return self._search(
url, query, since_id, until_id, start_time, end_time, max_results
)
stream(self, event=None, record_keepalive=False)
Returns a stream of tweets matching the defined rules.
Rules can be added or removed out-of-band, without disconnecting. Tweet results will contain metadata about the rule that matched it.
If event is set with a threading.Event object, the sample stream will be interrupted. This can be used for coordination with other programs.
Calls GET /2/tweets/search/stream
Parameters:
Name | Type | Description | Default |
---|---|---|---|
event |
threading.Event |
Manages a flag to stop the process. |
None |
record_keepalive |
bool |
whether to output keep-alive events. |
False |
Returns:
Type | Description |
---|---|
generator[dict] |
a generator, dict for each tweet. |
Source code in twarc/client2.py
@requires_app_auth
def stream(self, event=None, record_keepalive=False):
"""
Returns a stream of tweets matching the defined rules.
Rules can be added or removed out-of-band, without disconnecting.
Tweet results will contain metadata about the rule that matched it.
If event is set with a threading.Event object, the sample stream
will be interrupted. This can be used for coordination with other
programs.
Calls [GET /2/tweets/search/stream](https://developer.twitter.com/en/docs/twitter-api/tweets/filtered-stream/api-reference/get-tweets-search-stream)
Args:
event (threading.Event): Manages a flag to stop the process.
record_keepalive (bool): whether to output keep-alive events.
Returns:
generator[dict]: a generator, dict for each tweet.
"""
url = "https://api.twitter.com/2/tweets/search/stream"
params = expansions.EVERYTHING.copy()
yield from self._stream(url, params, event, record_keepalive)
timeline(self, user, since_id=None, until_id=None, start_time=None, end_time=None, exclude_retweets=False, exclude_replies=False)
Retrieve up to the 3200 most recent tweets made by the given user.
Calls GET /2/users/:id/tweets
Parameters:
Name | Type | Description | Default |
---|---|---|---|
user |
int |
ID of the user. |
required |
since_id |
int |
results with a Tweet ID greater than (newer) than specified |
None |
until_id |
int |
results with a Tweet ID less than (older) than specified |
None |
start_time |
datetime |
oldest UTC timestamp from which the Tweets will be provided |
None |
end_time |
datetime |
newest UTC timestamp from which the Tweets will be provided |
None |
exclude_retweets |
boolean |
remove retweets from timeline results |
False |
exclude_replies |
boolean |
remove replies from timeline results |
False |
Returns:
Type | Description |
---|---|
generator[dict] |
A generator, dict for each page of results. |
Source code in twarc/client2.py
def timeline(
self,
user,
since_id=None,
until_id=None,
start_time=None,
end_time=None,
exclude_retweets=False,
exclude_replies=False,
):
"""
Retrieve up to the 3200 most recent tweets made by the given user.
Calls [GET /2/users/:id/tweets](https://developer.twitter.com/en/docs/twitter-api/tweets/timelines/api-reference/get-users-id-tweets)
Args:
user (int): ID of the user.
since_id (int): results with a Tweet ID greater than (newer) than specified
until_id (int): results with a Tweet ID less than (older) than specified
start_time (datetime): oldest UTC timestamp from which the Tweets will be provided
end_time (datetime): newest UTC timestamp from which the Tweets will be provided
exclude_retweets (boolean): remove retweets from timeline results
exclude_replies (boolean): remove replies from timeline results
Returns:
generator[dict]: A generator, dict for each page of results.
"""
user_id = self._ensure_user_id(user)
return self._timeline(
user_id,
"tweets",
since_id,
until_id,
start_time,
end_time,
exclude_retweets,
exclude_replies,
)
tweet_lookup(self, tweet_ids)
Lookup tweets, taking an iterator of IDs and returning pages of fully expanded tweet objects.
This can be used to rehydrate a collection shared as only tweet IDs. Yields one page of tweets at a time, in blocks of up to 100.
Calls GET /2/tweets
Parameters:
Name | Type | Description | Default |
---|---|---|---|
tweet_ids |
iterable |
A list of tweet IDs |
required |
Returns:
Type | Description |
---|---|
generator[dict] |
a generator, dict for each batch of 100 tweets. |
Source code in twarc/client2.py
def tweet_lookup(self, tweet_ids):
"""
Lookup tweets, taking an iterator of IDs and returning pages of fully
expanded tweet objects.
This can be used to rehydrate a collection shared as only tweet IDs.
Yields one page of tweets at a time, in blocks of up to 100.
Calls [GET /2/tweets](https://developer.twitter.com/en/docs/twitter-api/tweets/lookup/api-reference/get-tweets)
Args:
tweet_ids (iterable): A list of tweet IDs
Returns:
generator[dict]: a generator, dict for each batch of 100 tweets.
"""
def lookup_batch(tweet_id):
url = "https://api.twitter.com/2/tweets"
params = expansions.EVERYTHING.copy()
params["ids"] = ",".join(tweet_id)
resp = self.get(url, params=params)
data = resp.json()
if self.metadata:
data = _append_metadata(data, resp.url)
return data
tweet_id_batch = []
for tweet_id in tweet_ids:
tweet_id_batch.append(str(int(tweet_id)))
if len(tweet_id_batch) == 100:
yield lookup_batch(tweet_id_batch)
tweet_id_batch = []
if tweet_id_batch:
yield (lookup_batch(tweet_id_batch))
user_lookup(self, users, usernames=False)
Returns fully populated user profiles for the given iterator of user_id or usernames. By default user_lookup expects user ids but if you want to pass in usernames set usernames = True.
Yields one page of results at a time (in blocks of at most 100 user profiles).
Calls GET /2/users
Parameters:
Name | Type | Description | Default |
---|---|---|---|
users |
iterable |
User IDs or usernames to lookup. |
required |
usernames |
bool |
Parse |
False |
Returns:
Type | Description |
---|---|
generator[dict] |
a generator, dict for each batch of 100 users. |
Source code in twarc/client2.py
def user_lookup(self, users, usernames=False):
"""
Returns fully populated user profiles for the given iterator of
user_id or usernames. By default user_lookup expects user ids but if
you want to pass in usernames set usernames = True.
Yields one page of results at a time (in blocks of at most 100 user
profiles).
Calls [GET /2/users](https://developer.twitter.com/en/docs/twitter-api/users/lookup/api-reference/get-users)
Args:
users (iterable): User IDs or usernames to lookup.
usernames (bool): Parse `users` as usernames, not IDs.
Returns:
generator[dict]: a generator, dict for each batch of 100 users.
"""
if usernames:
url = "https://api.twitter.com/2/users/by"
else:
url = "https://api.twitter.com/2/users"
def lookup_batch(users):
params = expansions.USER_EVERYTHING.copy()
if usernames:
params["usernames"] = ",".join(users)
else:
params["ids"] = ",".join(users)
resp = self.get(url, params=params)
data = resp.json()
if self.metadata:
data = _append_metadata(data, resp.url)
return data
batch = []
for item in users:
batch.append(str(item).strip())
if len(batch) == 100:
yield lookup_batch(batch)
batch = []
if batch:
yield (lookup_batch(batch))
handler: python