Tensorflow
Feature Engineering


Tensorflow

DNN model
Wide and Deep model

CLOUD ML logs

Tensorflow For Poets




F1 Score

Last updated










Last updated
def create_embed(sparse_col):
dim = 10 # default
if hasattr(sparse_col, 'bucket_size'):
nbins = sparse_col.bucket_size
if nbins is not None:
dim = 1 + int(round(np.log2(nbins)))
return tflayers.embedding_column(sparse_col, dimension=dim)def dnn_model(output_dir):
real, sparse = get_features()
all = {}
all.update(real)
# create embeddings of the sparse columns
embed = {
colname : create_embed(col) \
for colname, col in sparse.items()
}
all.update(embed)
estimator = tflearn.DNNClassifier(
model_dir=output_dir,
feature_columns=all.values(),
hidden_units=[64, 16, 4])
estimator = tf.contrib.estimator.add_metrics(estimator, my_rmse)
return estimator## Logs
2021-02-25 11:57:47.592 IST
ps-replica-1
"Cancellation requested for RunGraph."
Info
2021-02-25 11:57:47.630 IST
worker-replica-2
"loss = 169.66252, step = 1002"
Info
2021-02-25 11:57:47.674 IST
worker-replica-2
"Loss for final step: 169.66252."
Info
2021-02-25 11:57:47.837 IST
worker-replica-1
"loss = 169.63147, step = 1002"
Info
2021-02-25 11:57:47.868 IST
worker-replica-2
"model dir gs://qwiklabs-gcp-00-dfadb4cd0b7b/flights/chapter9/output"
Info
2021-02-25 11:57:47.881 IST
worker-replica-1
"Loss for final step: 169.63147."
Info
2021-02-25 11:57:47.898 IST
worker-replica-2
"Module completed; cleaning up."
Info
2021-02-25 11:57:47.898 IST
worker-replica-2
"Clean up finished."
Info
2021-02-25 11:57:47.899 IST
worker-replica-2
"Task completed successfully."
Warning
2021-02-25 11:57:47.948 IST
worker-replica-0
"Training with estimator made no steps. Perhaps input is empty or misspecified."
Info
2021-02-25 11:57:47.949 IST
worker-replica-0
"Loss for final step: None."
Info
2021-02-25 11:57:48.080 IST
worker-replica-1
"model dir gs://qwiklabs-gcp-00-dfadb4cd0b7b/flights/chapter9/output"def parse_hidden_units(s):
return [int(item) for item in s.split(',')]
def wide_and_deep_model(output_dir,nbuckets=5,
hidden_units='64,32', learning_rate=0.01):
real, sparse = get_features()
# lat/lon cols can be discretized to "air traffic corridors"
latbuckets = np.linspace(20.0, 50.0, nbuckets).tolist()
lonbuckets = np.linspace(-120.0, -70.0, nbuckets).tolist()
disc = {}
disc.update({
'd_{}'.format(key) : \
tflayers.bucketized_column(real[key], latbuckets) \
for key in ['dep_lat', 'arr_lat']
})
disc.update({
'd_{}'.format(key) : \
tflayers.bucketized_column(real[key], lonbuckets) \
for key in ['dep_lon', 'arr_lon']
})
# cross columns that make sense in combination
sparse['dep_loc'] = tflayers.crossed_column( \
[disc['d_dep_lat'], disc['d_dep_lon']],\
nbuckets*nbuckets)
sparse['arr_loc'] = tflayers.crossed_column( \
[disc['d_arr_lat'], disc['d_arr_lon']],\
nbuckets*nbuckets)
sparse['dep_arr'] = tflayers.crossed_column( \
[sparse['dep_loc'], sparse['arr_loc']],\
nbuckets ** 4)
sparse['ori_dest'] = tflayers.crossed_column( \
[sparse['origin'], sparse['dest']], \
hash_bucket_size=1000)
# create embeddings of all the sparse columns
embed = {
colname : create_embed(col) \
for colname, col in sparse.items()
}
real.update(embed)
#lin_opt=tf.train.FtrlOptimizer(learning_rate=learning_rate)
#l_rate=learning_rate*0.25
#dnn_opt=tf.train.AdagradOptimizer(learning_rate=l_rate)
estimator = tflearn.DNNLinearCombinedClassifier(
model_dir=output_dir,
linear_feature_columns=sparse.values(),
dnn_feature_columns=real.values(),
dnn_hidden_units=parse_hidden_units(hidden_units))
#linear_optimizer=lin_opt,
#dnn_optimizer=dnn_opt)
estimator = tf.contrib.estimator.add_metrics(estimator, my_rmse)
return estimator$ export JOBNAME=learn_rate_flights_$(date -u +%y%m%d_%H%M%S)
$ gcloud ai-platform jobs submit training $JOBNAME \
--module-name=trainer.task \
--package-path=$(pwd)/flights/trainer \
--job-dir=$OUTPUT_DIR \
--staging-bucket=gs://$BUCKET \
--region=$REGION \
--scale-tier=STANDARD_1 \
--runtime-version=1.15 \
-- \
--output_dir=$OUTPUT_DIR \
--traindata $DATA_DIR/train* \
--evaldata $DATA_DIR/test*$ gcloud ai-platform versions create v1 --model flights \
> --origin ${MODEL_LOCATION} \
> --runtime-version 1.15 \
> --region global# pip install --upgrade google-api-python-client
# pip install --upgrade oauth2client
>>> from oauth2client.client import GoogleCredentials
>>> import os
>>> import json
>>> credentials = GoogleCredentials.get_application_default()
>>> api = discovery.build('ml', 'v1', credentials=credentials,
... discoveryServiceUrl=
... 'https://storage.googleapis.com/cloud-ml/discovery/ml_v1_discovery.json')
>>> PROJECT = 'qwiklabs-gcp-00-dfadb4cd0b7b'
>>> parent = 'projects/%s/models/%s/versions/%s' % (PROJECT, 'flights', 'v1')
>>> request_data = {'instances':
... [
... {
... 'dep_delay': 16.0,
... 'taxiout': 13.0,
... 'distance': 160.0,
... 'avg_dep_delay': 13.34,
... 'avg_arr_delay': 67.0,
... 'carrier': 'AS',
... 'dep_lat': 61.17,
... 'dep_lon': -150.00,
... 'arr_lat': 60.49,
... 'arr_lon': -145.48,
... 'origin': 'ANC',
... 'dest': 'CDV'
... }
... ]
... }
>>> response = api.projects().predict(body=request_data, name=parent).execute()
>>> print ("response={0}".format(response))
response={u'predictions': [{u'all_class_ids': [0, 1], u'all_classes': [u'0', u'1'], u'probabilities': [0.9874195456504822, 0.01258043386042118], u'classes': [u'0'], u'logistic': [0.012580434791743755], u'logits': [-4.36295223236084], u'class_ids': [0]}]}python -m scripts.retrain \
--bottleneck_dir=tf_files/bottlenecks \
--how_many_training_steps=500 \
--model_dir=tf_files/models/ \
--summaries_dir=tf_files/training_summaries/"${ARCHITECTURE}" \
--output_graph=tf_files/retrained_graph.pb \
--output_labels=tf_files/retrained_labels.txt \
--architecture="${ARCHITECTURE}" \
--image_dir=tf_files/flower_photos