Tensorflow
Feature Engineering
Scale to large datasets, Find good features, Preprocess with Cloud MLE
Raw Data to Features

What makes a good feature?
Related to the objective

Tensorflow
.https://www.tensorflow.org/tutorials/text/word_embeddings
How to write and deploy a model with TensorFlow
.https://www.qwiklabs.com/focuses/3389 - Distributed Machine Learning with Google Cloud ML

Embedding function
def create_embed(sparse_col):
dim = 10 # default
if hasattr(sparse_col, 'bucket_size'):
nbins = sparse_col.bucket_size
if nbins is not None:
dim = 1 + int(round(np.log2(nbins)))
return tflayers.embedding_column(sparse_col, dimension=dim)
DNN model
def dnn_model(output_dir):
real, sparse = get_features()
all = {}
all.update(real)
# create embeddings of the sparse columns
embed = {
colname : create_embed(col) \
for colname, col in sparse.items()
}
all.update(embed)
estimator = tflearn.DNNClassifier(
model_dir=output_dir,
feature_columns=all.values(),
hidden_units=[64, 16, 4])
estimator = tf.contrib.estimator.add_metrics(estimator, my_rmse)
return estimator
## Logs
2021-02-25 11:57:47.592 IST
ps-replica-1
"Cancellation requested for RunGraph."
Info
2021-02-25 11:57:47.630 IST
worker-replica-2
"loss = 169.66252, step = 1002"
Info
2021-02-25 11:57:47.674 IST
worker-replica-2
"Loss for final step: 169.66252."
Info
2021-02-25 11:57:47.837 IST
worker-replica-1
"loss = 169.63147, step = 1002"
Info
2021-02-25 11:57:47.868 IST
worker-replica-2
"model dir gs://qwiklabs-gcp-00-dfadb4cd0b7b/flights/chapter9/output"
Info
2021-02-25 11:57:47.881 IST
worker-replica-1
"Loss for final step: 169.63147."
Info
2021-02-25 11:57:47.898 IST
worker-replica-2
"Module completed; cleaning up."
Info
2021-02-25 11:57:47.898 IST
worker-replica-2
"Clean up finished."
Info
2021-02-25 11:57:47.899 IST
worker-replica-2
"Task completed successfully."
Warning
2021-02-25 11:57:47.948 IST
worker-replica-0
"Training with estimator made no steps. Perhaps input is empty or misspecified."
Info
2021-02-25 11:57:47.949 IST
worker-replica-0
"Loss for final step: None."
Info
2021-02-25 11:57:48.080 IST
worker-replica-1
"model dir gs://qwiklabs-gcp-00-dfadb4cd0b7b/flights/chapter9/output"
Wide and Deep model
extend the model to include additional features by creating features that allow you to associate airports with broad geographic zones and from those derive simplified air traffic corridors. feature crossing.
You start by creating location buckets for an n*n grid covering the USA and then assign each departure and arrival airport to their specific grid locations.

def parse_hidden_units(s):
return [int(item) for item in s.split(',')]
def wide_and_deep_model(output_dir,nbuckets=5,
hidden_units='64,32', learning_rate=0.01):
real, sparse = get_features()
# lat/lon cols can be discretized to "air traffic corridors"
latbuckets = np.linspace(20.0, 50.0, nbuckets).tolist()
lonbuckets = np.linspace(-120.0, -70.0, nbuckets).tolist()
disc = {}
disc.update({
'd_{}'.format(key) : \
tflayers.bucketized_column(real[key], latbuckets) \
for key in ['dep_lat', 'arr_lat']
})
disc.update({
'd_{}'.format(key) : \
tflayers.bucketized_column(real[key], lonbuckets) \
for key in ['dep_lon', 'arr_lon']
})
# cross columns that make sense in combination
sparse['dep_loc'] = tflayers.crossed_column( \
[disc['d_dep_lat'], disc['d_dep_lon']],\
nbuckets*nbuckets)
sparse['arr_loc'] = tflayers.crossed_column( \
[disc['d_arr_lat'], disc['d_arr_lon']],\
nbuckets*nbuckets)
sparse['dep_arr'] = tflayers.crossed_column( \
[sparse['dep_loc'], sparse['arr_loc']],\
nbuckets ** 4)
sparse['ori_dest'] = tflayers.crossed_column( \
[sparse['origin'], sparse['dest']], \
hash_bucket_size=1000)
# create embeddings of all the sparse columns
embed = {
colname : create_embed(col) \
for colname, col in sparse.items()
}
real.update(embed)
#lin_opt=tf.train.FtrlOptimizer(learning_rate=learning_rate)
#l_rate=learning_rate*0.25
#dnn_opt=tf.train.AdagradOptimizer(learning_rate=l_rate)
estimator = tflearn.DNNLinearCombinedClassifier(
model_dir=output_dir,
linear_feature_columns=sparse.values(),
dnn_feature_columns=real.values(),
dnn_hidden_units=parse_hidden_units(hidden_units))
#linear_optimizer=lin_opt,
#dnn_optimizer=dnn_opt)
estimator = tf.contrib.estimator.add_metrics(estimator, my_rmse)
return estimator
CLOUD ML logs

Submit job to AI platform
$ export JOBNAME=learn_rate_flights_$(date -u +%y%m%d_%H%M%S)
$ gcloud ai-platform jobs submit training $JOBNAME \
--module-name=trainer.task \
--package-path=$(pwd)/flights/trainer \
--job-dir=$OUTPUT_DIR \
--staging-bucket=gs://$BUCKET \
--region=$REGION \
--scale-tier=STANDARD_1 \
--runtime-version=1.15 \
-- \
--output_dir=$OUTPUT_DIR \
--traindata $DATA_DIR/train* \
--evaldata $DATA_DIR/test*
Model deployment and usage
$ gcloud ai-platform versions create v1 --model flights \
> --origin ${MODEL_LOCATION} \
> --runtime-version 1.15 \
> --region global
# pip install --upgrade google-api-python-client
# pip install --upgrade oauth2client
>>> from oauth2client.client import GoogleCredentials
>>> import os
>>> import json
>>> credentials = GoogleCredentials.get_application_default()
>>> api = discovery.build('ml', 'v1', credentials=credentials,
... discoveryServiceUrl=
... 'https://storage.googleapis.com/cloud-ml/discovery/ml_v1_discovery.json')
>>> PROJECT = 'qwiklabs-gcp-00-dfadb4cd0b7b'
>>> parent = 'projects/%s/models/%s/versions/%s' % (PROJECT, 'flights', 'v1')
>>> request_data = {'instances':
... [
... {
... 'dep_delay': 16.0,
... 'taxiout': 13.0,
... 'distance': 160.0,
... 'avg_dep_delay': 13.34,
... 'avg_arr_delay': 67.0,
... 'carrier': 'AS',
... 'dep_lat': 61.17,
... 'dep_lon': -150.00,
... 'arr_lat': 60.49,
... 'arr_lon': -145.48,
... 'origin': 'ANC',
... 'dest': 'CDV'
... }
... ]
... }
>>> response = api.projects().predict(body=request_data, name=parent).execute()
>>> print ("response={0}".format(response))
response={u'predictions': [{u'all_class_ids': [0, 1], u'all_classes': [u'0', u'1'], u'probabilities': [0.9874195456504822, 0.01258043386042118], u'classes': [u'0'], u'logistic': [0.012580434791743755], u'logits': [-4.36295223236084], u'class_ids': [0]}]}
Tensorflow For Poets
.https://github.com/googlecodelabs/tensorflow-for-poets-2/blob/master/scripts/label_image.py
python -m scripts.retrain \
--bottleneck_dir=tf_files/bottlenecks \
--how_many_training_steps=500 \
--model_dir=tf_files/models/ \
--summaries_dir=tf_files/training_summaries/"${ARCHITECTURE}" \
--output_graph=tf_files/retrained_graph.pb \
--output_labels=tf_files/retrained_labels.txt \
--architecture="${ARCHITECTURE}" \
--image_dir=tf_files/flower_photos




F1 Score

Last updated
Was this helpful?