Create variables
Contents
Create variables#
derive - Creating new variables#
Let’s say we’ve run a survey asking people about what sports they do. We want to create a new variable that designates people who do sports/excersize regularly and have their main fitness activity as aerobics, yoga or pilates.
Exploring the relevant codes#
In order to do this we look at the meta data for questions q1
and q2b
and find the codes for aerobics, yoga, pilates and regular sporties. These are 4, 5, 6 from q1
and 1 for q2b
.
# what is your main sports activity?
dataset.meta(variable='q1')
---------------------------------------------------------------------------
KeyboardInterrupt Traceback (most recent call last)
Cell In[2], line 2
1 # what is your main sports activity?
----> 2 dataset.meta(variable='q1')
File ~/work/tally-client/tally-client/tally/dataset.py:753, in DataSet.meta(self, **kwargs)
743 def meta(self, **kwargs):
744 """Shows the meta-data for a variable
745
746 Parameters
(...)
751 Name of multiple variables to show meta data for
752 """
--> 753 return self._call_tally('meta', **kwargs)
File ~/work/tally-client/tally-client/tally/decorators.py:36, in add_data.<locals>.wrapper(*aargs, **kkwargs)
29 elif aargs[0].dataset_type == 'sav':
30 kkwargs['data_params'] = {
31 'binary_data': {'spss': (aargs[0].filename,
32 io.BytesIO(aargs[0].sav_data),
33 'application/x-spss-sav')
34 }
35 }
---> 36 return func(*aargs, **kkwargs)
File ~/work/tally-client/tally-client/tally/decorators.py:43, in format_response.<locals>.wrapper(*aargs, **kkwargs)
40 @functools.wraps(func)
41 def wrapper(*aargs, **kkwargs):
42 format = kkwargs.pop('format', None)
---> 43 result = func(*aargs, **kkwargs)
44 if result is None:
45 return
File ~/work/tally-client/tally-client/tally/dataset.py:50, in DataSet._call_tally(self, api_endpoint, *args, **kwargs)
47 if self.use_futures:
48 # Returns the uid of the operation.
49 return self.call_use_future(api_endpoint, kwargs, files)
---> 50 response = self.tally.post_request('tally', api_endpoint, payload, files)
51 if response.status_code == 404:
52 return self._method_not_found_response(api_endpoint)
File ~/work/tally-client/tally-client/tally/decorators.py:107, in verify_token.<locals>.wrapper(*aargs, **kkwargs)
105 @functools.wraps(func)
106 def wrapper(*aargs, **kkwargs):
--> 107 result = func(*aargs, **kkwargs)
108 if result.status_code == 401:
109 result_dict = json.loads(result.content)
File ~/work/tally-client/tally-client/tally/tally.py:117, in Tally.post_request(self, resource, action, data, files)
115 result = requests.post(request_path, headers=headers, data=data, files=files)
116 else:
--> 117 result = requests.post(request_path, headers=self._get_headers(), data=json.dumps(data))
118 return result
File /opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages/requests/api.py:115, in post(url, data, json, **kwargs)
103 def post(url, data=None, json=None, **kwargs):
104 r"""Sends a POST request.
105
106 :param url: URL for the new :class:`Request` object.
(...)
112 :rtype: requests.Response
113 """
--> 115 return request("post", url, data=data, json=json, **kwargs)
File /opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages/requests/api.py:59, in request(method, url, **kwargs)
55 # By using the 'with' statement we are sure the session is closed, thus we
56 # avoid leaving sockets open which can trigger a ResourceWarning in some
57 # cases, and look like a memory leak in others.
58 with sessions.Session() as session:
---> 59 return session.request(method=method, url=url, **kwargs)
File /opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages/requests/sessions.py:589, in Session.request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
584 send_kwargs = {
585 "timeout": timeout,
586 "allow_redirects": allow_redirects,
587 }
588 send_kwargs.update(settings)
--> 589 resp = self.send(prep, **send_kwargs)
591 return resp
File /opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages/requests/sessions.py:703, in Session.send(self, request, **kwargs)
700 start = preferred_clock()
702 # Send the request
--> 703 r = adapter.send(request, **kwargs)
705 # Total elapsed time of the request (approximately)
706 elapsed = preferred_clock() - start
File /opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages/requests/adapters.py:486, in HTTPAdapter.send(self, request, stream, timeout, verify, cert, proxies)
483 timeout = TimeoutSauce(connect=timeout, read=timeout)
485 try:
--> 486 resp = conn.urlopen(
487 method=request.method,
488 url=url,
489 body=request.body,
490 headers=request.headers,
491 redirect=False,
492 assert_same_host=False,
493 preload_content=False,
494 decode_content=False,
495 retries=self.max_retries,
496 timeout=timeout,
497 chunked=chunked,
498 )
500 except (ProtocolError, OSError) as err:
501 raise ConnectionError(err, request=request)
File /opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages/urllib3/connectionpool.py:790, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, preload_content, decode_content, **response_kw)
787 response_conn = conn if not release_conn else None
789 # Make the request on the HTTPConnection object
--> 790 response = self._make_request(
791 conn,
792 method,
793 url,
794 timeout=timeout_obj,
795 body=body,
796 headers=headers,
797 chunked=chunked,
798 retries=retries,
799 response_conn=response_conn,
800 preload_content=preload_content,
801 decode_content=decode_content,
802 **response_kw,
803 )
805 # Everything went great!
806 clean_exit = True
File /opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages/urllib3/connectionpool.py:536, in HTTPConnectionPool._make_request(self, conn, method, url, body, headers, retries, timeout, chunked, response_conn, preload_content, decode_content, enforce_content_length)
534 # Receive the response from the server
535 try:
--> 536 response = conn.getresponse()
537 except (BaseSSLError, OSError) as e:
538 self._raise_timeout(err=e, url=url, timeout_value=read_timeout)
File /opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages/urllib3/connection.py:454, in HTTPConnection.getresponse(self)
451 from .response import HTTPResponse
453 # Get the response from http.client.HTTPConnection
--> 454 httplib_response = super().getresponse()
456 try:
457 assert_header_parsing(httplib_response.msg)
File /opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/http/client.py:1377, in HTTPConnection.getresponse(self)
1375 try:
1376 try:
-> 1377 response.begin()
1378 except ConnectionError:
1379 self.close()
File /opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/http/client.py:320, in HTTPResponse.begin(self)
318 # read until we get a non-100 response
319 while True:
--> 320 version, status, reason = self._read_status()
321 if status != CONTINUE:
322 break
File /opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/http/client.py:281, in HTTPResponse._read_status(self)
280 def _read_status(self):
--> 281 line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
282 if len(line) > _MAXLINE:
283 raise LineTooLong("status line")
File /opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/socket.py:704, in SocketIO.readinto(self, b)
702 while True:
703 try:
--> 704 return self._sock.recv_into(b)
705 except timeout:
706 self._timeout_occurred = True
File /opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/ssl.py:1242, in SSLSocket.recv_into(self, buffer, nbytes, flags)
1238 if flags != 0:
1239 raise ValueError(
1240 "non-zero flags not allowed in calls to recv_into() on %s" %
1241 self.__class__)
-> 1242 return self.read(nbytes, buffer)
1243 else:
1244 return super().recv_into(buffer, nbytes, flags)
File /opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/ssl.py:1100, in SSLSocket.read(self, len, buffer)
1098 try:
1099 if buffer is not None:
-> 1100 return self._sslobj.read(len, buffer)
1101 else:
1102 return self._sslobj.read(len)
KeyboardInterrupt:
# how regularly do you excersise
dataset.meta(variable='q2b')
Building the logic#
We now build the logic for our new variable. Codes 1 will represent “regular sporties, mainly into yoga, aerobics or pilates” and code 2 will include non sporties and those who don’t have the above sports as their main sports. We use the Tally logical system as documented in the API.
cond_map = [
(
1,
"Regular sporties, mainly into yoga, aerobics or pilates",
{ "$intersection": [{"q1":[4, 5, 6]}, {"q2b":[1]}] }
),
(
2,
"Non sporties, main activity not yoga, airobics, pilates",
{"$union":
[
{"$not_any":{"q2b":[1]}},
{"$not_any":{"q1":[4,5,6]}}
]
}
)
]
Creating the derived variable#
Using our new logic, we create the variable with the derive method (for more details, see the API on derive).
dataset.derive(
name='active_aer_yoga_pilates',
label='Active sporties into aerobics, yoga or pilates',
cond_maps=cond_map,
qtype="single"
)
Once we have created the new variable, we can sanity check it.
dataset.crosstab(x='active_aer_yoga_pilates', ci=['counts', 'c%'])