Parse a text file uploaded in S3 and add it's content to DynamoDB

We are going to read a text file as soon as it's uploaded in S3 and add it's data into Dynamo DB using Lambda – Node JS.

Things we will need to do:-
1.       Create S3 bucket and Dynamodb Table.
2.       Create a role (S3 access and dynamodb access) for the lambda function that you will be creating.
3.       Create lambda function with S3 Object creation Event source.
4.       Test.

Before we start, just some names that you should know that I have used in the example -

S3 bucket Name: s3-to-lambda-object-creation-01

 Lambda Function Name: s3-dynamodb-lambda-02

Dynamodb Table Name: Movies

Dynamodb Table Attributes:
Key - year,
Other Attribute - title.


Step 1:-
Before we dive into how to use lambda functions and it’s details, we need to have an S3 bucket and a Dynamodb Table in place for the current problem statement.
Creating these two is pretty straight forward. You can refer the links below :-
S3 :-

Dynamodb :-

Step 2:-
Before you start creating a lambda function, you need to assign it an Execution Role so that lambda has the permission to access S3 and Dynamodb and make changes to them.
We will be giving S3 Access and Dynamodb Access.
We are also going to give it some policies to be able to write logs in CloudWatch.

The policy is -

{
    "Version": "2012-10-17",
    "Statement": [
        {
            "Sid": "Stmt146834439900",
            "Effect": "Allow",
            "Action": [
                "dynamodb:PutItem"
            ],
            "Resource": [
                "arn:aws:dynamodb:us-east-1:64236701746:table/Movies"    -------> Table ARN
            ]
        },
        {
            "Sid": "Stmt146834444000",
            "Effect": "Allow",
            "Action": [
                "logs:CreateLogGroup",
                "logs:CreateLogStream",
                "logs:PutLogEvents"
            ],
            "Resource": [
                "arn:aws:logs:*:*:*"
            ]
        },
        {
            "Sid": "Stmt146834847000",
            "Effect": "Allow",
            "Action": [
                "s3:GetObject"
            ],
            "Resource": [
                "arn:aws:s3:::<bucket_name>/*"
            ]
        }
    ]
}

Step 3:-
Now that you have your role in place, we can create the lambda function -

Step 3.1 -
Select Blueprint - You can skip this step since we are going to write our own custom code.

Step 3.2 -

Configure triggers -


Select S3 from the drop-down list.

After selecting S3 you will be asked for filling in the bucket name and other details -


Make sure you select Object Created option for event type.
You can enable the trigger from now itself or you can enable it later too after creating the function.

Step 3.3 -
Configure function - Now here we will be defining the function including writing it’s code.


Below the code entry type - you will have to insert your code.

Code -

'use strict';
console.log('Loading function');

let aws = require('aws-sdk');
let s3 = new aws.S3({ apiVersion: '2006-03-01' });

exports.handler = (event, context, callback) => {
    const bucket = event.Records[0].s3.bucket.name;
    const key = decodeURIComponent(event.Records[0].s3.object.key.replace(/\+/g, ' '));
    const params = {
        Bucket: bucket,
        Key: key
    };
    var docClient = new aws.DynamoDB.DocumentClient();
    var table = "Movies";
  

    s3.getObject(params, (err, data) => {
        if (err) {
            console.log(err);
            const message = `Error getting object ${key} from bucket ${bucket}. Make sure they exist and your bucket is in the same region as this function.`;
            console.log(message);
            callback(message);
        } else {
        
            //get data in the file using data.Body
            var file_data=String(data.Body);

            // split the data first based on the newline character and add each line into an array
            var split_data_newline= file_data.split("\n");
            var final_split_data= []; // 2D array with final split data from file
            var temp_comma_split= []; // Temp array for storing "," seperated values of each line
            var i,j=0;
          
           // interate through the array which contains full full lines as it's element.. now take each element of that array and split using ","
           // take the "," seperated items into a temp array (temp_comma_split)
           // using the second inner for loop.. iterate the temp array and add each element of it into the 2D array (final_split_data)

            for(i=0; i<split_data_newline.length; i++) {
                final_split_data[i]= [];
                temp_comma_split = split_data_newline[i].split(",");
           
                for(j=0; j<temp_comma_split.length; j++) {
                    final_split_data[i][j]=temp_comma_split[j]; 
               }
            }
    

            // Iterate through the final 2D array with data to add into DB fields

            for(i=0; i<final_split_data.length; i++) {
                var year = final_split_data[i][0];
                var title = final_split_data[i][1];

                var DB_params = {
                    TableName:table,
                    Item:{
                        "year": year,
                        "title": title,
                    }
                };

                console.log("Adding a new item...");
                docClient.put(DB_params, function(err, data) {
                    if (err) {
                        console.error("Unable to add item. Error JSON:", JSON.stringify(err, null, 2));
                    } else {
                        console.log("Added item:", JSON.stringify(data, null, 2));
                    }
                });
            }
        }
    });
};




Code assumption that the input file will be something like this -
2011,baghban
2012,dhoom


After the code, AWS will pre-populate the Handler field for you. It is the filename.handler-function.
It is a function in your code that AWS Lambda can invoke when the service executes your code.

Next it will ask you for a role to be attached, simply choose the role you created in step 2 here.
Later setting you can leave as it is since our use-case doesn’t require any special settings.

Step 3.4 -

Review - Review your function and hit Create Function button.

 Once done you should see something like this under your Triggers Tab -


Step 4:-

For Testing the setup -
Take the input file shown below and upload it to your S3 bucket.
You should see the two rows getting added in your Dynamodb Table.
Also go to your CloudWatch Logs - you should see a new Log Group created with the same name as your Lambda Function. Inside the group you can see all your logs getting accumulated.

Logs should look something like this- 


Launch an EC2 instance for every new row added into DynamoDB Table using Lambda

Problem Statement:- As soon as a row gets added to the Dynamodb Table, launch an EC2 machine and use the Key column value of the table as the tag name for the instance.
Solution:-
Things we will need to do:-
1.       Create the Dynamodb Table.
2.       Create a role (EC2 access and dynamodb access) for the lambda function that you will be creating.
3.       Create lambda function with Dynamodb Trigger.
4.       Test.
Before we start, just some names that you should know that I have used in the example -
Lambda Function Name: Dynamo_Trigger_Ec2_Function_v2
Dynamodb Table Name: Dynamo_Trigger
Dynamodb Table Attributes:
Key - Item_Id
Step 1:-
Before we dive into how to use lambda functions and it’s details, we need to have a Dynamodb Table in place for the current problem statement.
Creating a table is pretty straight forward. You can refer the links below :-
Dynamodb :-

Step 2:-
Before you start creating a lambda function, you need to assign it an Execution Role so that lambda has the permission to access Dynamodb and EC2.
We will be giving EC2 Access and Dynamodb Access.
We are also going to give it some policies to be able to write logs in CloudWatch.


The policy is -
{
    "Version": "2012-10-17",
    "Statement": [
        {
            "Sid": "Stmt146917013000",
            "Effect": "Allow",
            "Action": [
                "ec2:CreateTags",
                "ec2:RunInstances"
            ],
            "Resource": [
                "*"
            ]
        },
        {
            "Sid": "Stmt149170813000",
            "Effect": "Allow",
            "Action": [
                "dynamodb:GetItem",
                "dynamodb:GetRecords"
            ],
            "Resource": [
                "arn:aws:dynamodb:us-east-1:64237017496:table/Dynamo_Trigger"
            ]
        },
        {
            "Sid": "Stmt146917088600",
            "Effect": "Allow",
            "Action": [
                "logs:CreateLogGroup",
                "logs:CreateLogStream",
                "logs:PutLogEvents"
            ],
            "Resource": [
                "arn:aws:logs:*:*:*"
            ]
        }
    ]
}

Step 3:-
Now that you have your role in place, we can create the lambda function -
Step 3.1 -
Select Blueprint - You can skip this step since we are going to write our own custom code.
Step 3.2 -
Configure triggers -

Select DynamoDB from the drop-down list.
After selecting DynamoDB you will be asked for filling in the table name and other details -

Make sure you select the right table for the trigger.
You can enable the trigger from now itself or you can enable it later too after creating the function.
Step 3.3 -
Configure function - Now here we will be defining the function including writing it’s code.

Below the code entry type - you will have to insert your code.
Code -
'use strict';
console.log('Loading function');

exports.handler = (event, context, callback) => {
    //console.log('Received event:', JSON.stringify(event, null, 2));
    //var docClient = new AWS.DynamoDB.DocumentClient();
    var tag_name;
    var event_name;
    var AWS = require('aws-sdk');
    AWS.config.region = 'us-east-1';
    var ec2 = new AWS.EC2();
    var params = {
        ImageId: 'ami-a4827dc9', /* Use this AMI ID for now */
        MaxCount: 1,
        MinCount: 1,
                DryRun: false,
                EbsOptimized: false,
                InstanceType: 't2.micro',
                KeyName: 'US-EAST-CLOUDAPI-LB-key',
        Monitoring: {
                Enabled: false
        },
        Placement: {
                Tenancy: 'default'
        },
        SecurityGroupIds: [
                'sg-436abe3b',
        ],
        SubnetId: 'subnet-5877fa2e',
    };

    event.Records.forEach((record) => {
        console.log(record.eventID);
        event_name = record.eventName;

        switch(event_name){

            case "INSERT":
                console.log('DynamoDB Record: %j', record.dynamodb);
                tag_name = record.dynamodb.Keys.Item_Id.S; // adding instance tag as the same key column value from the db.
                console.log(tag_name);

                // Create the instance
                ec2.runInstances(params, function(err, data) {
                    if (err) { console.log("Could not create instance", err); return; }

                    var instanceId = data.Instances[0].InstanceId;
                    console.log("Created instance", instanceId);

                    // Add tags to the instance
                    params = {Resources: [instanceId], Tags: [
                        {Key: 'Name', Value: tag_name}
                    ]};
                    ec2.createTags(params, function(err) {
                        console.log("Tagging instance", err ? "failure" : "success");
                    });
                });

                break;

            case "REMOVE":
                console.log('DynamoDB Record: %j', record.dynamodb);
                //console.log('key value: %j', record.dynamodb.Keys.Item_Id.S);
                console.log("Since it's a REMOVE so not doing anything.");

        }
       
    });
    callback(null, `Successfully processed ${event.Records.length} records.`);
  

};



After the code, AWS will pre-populate the Handler field for you. It is the filename.handler-function.
It is a function in your code that AWS Lambda can invoke when the service executes your code.

Next it will ask you for a role to be attached, simply choose the role you created in step 2 here.
Later setting you can leave as it is since our use-case doesn’t require any special settings.
Step 3.4 -
Review - Review your function and hit Create Function button.
Once done you should see something like this under your Triggers Tab -

Step 4:-
For Testing the setup -
Add a row in your database..
You should see an EC2 machine getting launched with the tag same as the Key Column value in your database.
Like in the example here, I added Item_Id = dummy, so in my EC2 console I can see an instance getting launched with the tag “dummy”.
Also go to your CloudWatch Logs - you should see a new Log Group created with the same name as your Lambda Function. Inside the group you can see all your logs getting accumulated.
Logs should look something like this-