Create variables#

derive - Creating new variables#

Let’s say we’ve run a survey asking people about what sports they do. We want to create a new variable that designates people who do sports/excersize regularly and have their main fitness activity as aerobics, yoga or pilates.

Exploring the relevant codes#

In order to do this we look at the meta data for questions q1 and q2b and find the codes for aerobics, yoga, pilates and regular sporties. These are 4, 5, 6 from q1 and 1 for q2b.

# what is your main sports activity?
dataset.meta(variable='q1')
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
Cell In[2], line 2
      1 # what is your main sports activity?
----> 2 dataset.meta(variable='q1')

File ~/work/tally-client/tally-client/tally/dataset.py:753, in DataSet.meta(self, **kwargs)
    743 def meta(self, **kwargs):
    744     """Shows the meta-data for a variable
    745 
    746     Parameters
   (...)
    751         Name of multiple variables to show meta data for
    752     """
--> 753     return self._call_tally('meta', **kwargs)

File ~/work/tally-client/tally-client/tally/decorators.py:36, in add_data.<locals>.wrapper(*aargs, **kkwargs)
     29 elif aargs[0].dataset_type == 'sav':
     30     kkwargs['data_params'] = {
     31         'binary_data': {'spss': (aargs[0].filename, 
     32                           io.BytesIO(aargs[0].sav_data), 
     33                           'application/x-spss-sav')
     34                 }
     35     }
---> 36 return func(*aargs, **kkwargs)

File ~/work/tally-client/tally-client/tally/decorators.py:43, in format_response.<locals>.wrapper(*aargs, **kkwargs)
     40 @functools.wraps(func)
     41 def wrapper(*aargs, **kkwargs):
     42     format = kkwargs.pop('format', None)
---> 43     result = func(*aargs, **kkwargs)
     44     if result is None:
     45         return

File ~/work/tally-client/tally-client/tally/dataset.py:50, in DataSet._call_tally(self, api_endpoint, *args, **kwargs)
     47 if self.use_futures:
     48     # Returns the uid of the operation.
     49     return self.call_use_future(api_endpoint, kwargs, files)
---> 50 response = self.tally.post_request('tally', api_endpoint, payload, files)
     51 if response.status_code == 404:
     52     return self._method_not_found_response(api_endpoint)

File ~/work/tally-client/tally-client/tally/decorators.py:107, in verify_token.<locals>.wrapper(*aargs, **kkwargs)
    105 @functools.wraps(func)
    106 def wrapper(*aargs, **kkwargs):
--> 107     result = func(*aargs, **kkwargs)
    108     if result.status_code == 401:
    109         result_dict = json.loads(result.content)

File ~/work/tally-client/tally-client/tally/tally.py:117, in Tally.post_request(self, resource, action, data, files)
    115     result = requests.post(request_path, headers=headers, data=data, files=files)
    116 else:
--> 117     result = requests.post(request_path, headers=self._get_headers(), data=json.dumps(data))
    118 return result

File /opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages/requests/api.py:115, in post(url, data, json, **kwargs)
    103 def post(url, data=None, json=None, **kwargs):
    104     r"""Sends a POST request.
    105 
    106     :param url: URL for the new :class:`Request` object.
   (...)
    112     :rtype: requests.Response
    113     """
--> 115     return request("post", url, data=data, json=json, **kwargs)

File /opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages/requests/api.py:59, in request(method, url, **kwargs)
     55 # By using the 'with' statement we are sure the session is closed, thus we
     56 # avoid leaving sockets open which can trigger a ResourceWarning in some
     57 # cases, and look like a memory leak in others.
     58 with sessions.Session() as session:
---> 59     return session.request(method=method, url=url, **kwargs)

File /opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages/requests/sessions.py:589, in Session.request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
    584 send_kwargs = {
    585     "timeout": timeout,
    586     "allow_redirects": allow_redirects,
    587 }
    588 send_kwargs.update(settings)
--> 589 resp = self.send(prep, **send_kwargs)
    591 return resp

File /opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages/requests/sessions.py:703, in Session.send(self, request, **kwargs)
    700 start = preferred_clock()
    702 # Send the request
--> 703 r = adapter.send(request, **kwargs)
    705 # Total elapsed time of the request (approximately)
    706 elapsed = preferred_clock() - start

File /opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages/requests/adapters.py:486, in HTTPAdapter.send(self, request, stream, timeout, verify, cert, proxies)
    483     timeout = TimeoutSauce(connect=timeout, read=timeout)
    485 try:
--> 486     resp = conn.urlopen(
    487         method=request.method,
    488         url=url,
    489         body=request.body,
    490         headers=request.headers,
    491         redirect=False,
    492         assert_same_host=False,
    493         preload_content=False,
    494         decode_content=False,
    495         retries=self.max_retries,
    496         timeout=timeout,
    497         chunked=chunked,
    498     )
    500 except (ProtocolError, OSError) as err:
    501     raise ConnectionError(err, request=request)

File /opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages/urllib3/connectionpool.py:790, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, preload_content, decode_content, **response_kw)
    787 response_conn = conn if not release_conn else None
    789 # Make the request on the HTTPConnection object
--> 790 response = self._make_request(
    791     conn,
    792     method,
    793     url,
    794     timeout=timeout_obj,
    795     body=body,
    796     headers=headers,
    797     chunked=chunked,
    798     retries=retries,
    799     response_conn=response_conn,
    800     preload_content=preload_content,
    801     decode_content=decode_content,
    802     **response_kw,
    803 )
    805 # Everything went great!
    806 clean_exit = True

File /opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages/urllib3/connectionpool.py:536, in HTTPConnectionPool._make_request(self, conn, method, url, body, headers, retries, timeout, chunked, response_conn, preload_content, decode_content, enforce_content_length)
    534 # Receive the response from the server
    535 try:
--> 536     response = conn.getresponse()
    537 except (BaseSSLError, OSError) as e:
    538     self._raise_timeout(err=e, url=url, timeout_value=read_timeout)

File /opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/site-packages/urllib3/connection.py:454, in HTTPConnection.getresponse(self)
    451 from .response import HTTPResponse
    453 # Get the response from http.client.HTTPConnection
--> 454 httplib_response = super().getresponse()
    456 try:
    457     assert_header_parsing(httplib_response.msg)

File /opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/http/client.py:1377, in HTTPConnection.getresponse(self)
   1375 try:
   1376     try:
-> 1377         response.begin()
   1378     except ConnectionError:
   1379         self.close()

File /opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/http/client.py:320, in HTTPResponse.begin(self)
    318 # read until we get a non-100 response
    319 while True:
--> 320     version, status, reason = self._read_status()
    321     if status != CONTINUE:
    322         break

File /opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/http/client.py:281, in HTTPResponse._read_status(self)
    280 def _read_status(self):
--> 281     line = str(self.fp.readline(_MAXLINE + 1), "iso-8859-1")
    282     if len(line) > _MAXLINE:
    283         raise LineTooLong("status line")

File /opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/socket.py:704, in SocketIO.readinto(self, b)
    702 while True:
    703     try:
--> 704         return self._sock.recv_into(b)
    705     except timeout:
    706         self._timeout_occurred = True

File /opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/ssl.py:1242, in SSLSocket.recv_into(self, buffer, nbytes, flags)
   1238     if flags != 0:
   1239         raise ValueError(
   1240           "non-zero flags not allowed in calls to recv_into() on %s" %
   1241           self.__class__)
-> 1242     return self.read(nbytes, buffer)
   1243 else:
   1244     return super().recv_into(buffer, nbytes, flags)

File /opt/hostedtoolcache/Python/3.9.17/x64/lib/python3.9/ssl.py:1100, in SSLSocket.read(self, len, buffer)
   1098 try:
   1099     if buffer is not None:
-> 1100         return self._sslobj.read(len, buffer)
   1101     else:
   1102         return self._sslobj.read(len)

KeyboardInterrupt: 
# how regularly do you excersise
dataset.meta(variable='q2b')

Building the logic#

We now build the logic for our new variable. Codes 1 will represent “regular sporties, mainly into yoga, aerobics or pilates” and code 2 will include non sporties and those who don’t have the above sports as their main sports. We use the Tally logical system as documented in the API.

cond_map = [
    (
        1, 
        "Regular sporties, mainly into yoga, aerobics or pilates", 
        { "$intersection": [{"q1":[4, 5, 6]}, {"q2b":[1]}] }
    ),
    (
        2, 
        "Non sporties, main activity not yoga, airobics, pilates", 
        {"$union":
            [
                {"$not_any":{"q2b":[1]}},
                {"$not_any":{"q1":[4,5,6]}}
            ]
        }
    )
]

Creating the derived variable#

Using our new logic, we create the variable with the derive method (for more details, see the API on derive).

dataset.derive(
    name='active_aer_yoga_pilates', 
    label='Active sporties into aerobics, yoga or pilates', 
    cond_maps=cond_map, 
    qtype="single"
)

Once we have created the new variable, we can sanity check it.

dataset.crosstab(x='active_aer_yoga_pilates', ci=['counts', 'c%'])