S3(Simple Storage Service)

Object Storage

Basically nice http filesystem as a service

Buckets that can be accessed over https with uri like https://s3.us-east-2.amazonaws.com/jsfuentes-test/JorgeFuentes_Nov2018.pdf

Can configure access, default private

Uses:

  • store files, images, videos, etc

  • could be used as a ghetto key value store or ghetto GitHub

  • can serve as a static web site hosting

Creation

Nice coursera Overview

Make new bucket give unique name, create bucket, super ez,

CLI

aws s3 sync . s3://mybucket

Publically Accessible

Make public, can just follow link to object ez ezezezezezezz

Permissions->ACL to give public access

Permissions->CORS configuration: Might need CORS, unsure...

[
    {
        "AllowedHeaders": [
            "*"
        ],
        "AllowedMethods": [
            "GET"
        ],
        "AllowedOrigins": [
            "*"
        ],
        "ExposeHeaders": [],
        "MaxAgeSeconds": 3000
    }
]

Can create an endpoint to create signed urls to access specific resources

Static Website Hosting

  1. Set to publically accessible in the bucket policy

  • policy type → S3

  • Effect → Allow

  • Principal → *

  • Actions → GetObject

  • Amazon Resource Number (ARN) → Copy “arn” from permissions tab add /* or path

    {
      "Id": "Policy1567137118824",
      "Version": "2012-10-17",
      "Statement": [
        {
          "Sid": "Stmt1567137117737",
          "Action": [
            "s3:GetObject"
          ],
          "Effect": "Allow",
          "Resource": "arn:aws:s3:::ecstatic-iframe-plugin/*",
          "Principal": "*"
        }
      ]
    }
  1. Go to Properties tab of bucket and turn on server web hosting set both index and error to index.html

  • You need index.html in EACH subfolder and path requests will route to that index.html

  • You will get a http url

  1. To setup https, you use Cloudfront look at Cloudfront.md

  • ? You might be able to just use cloudfront and skip step 2?

Extra: Look at codebuild.md to get a built react app or github updates into the S3, use the AWS cli to get the artifact in the root

Advanced

MultiPart Upload

Can upload a file part by part and even in parallel, then it will be constructed when you signal completion

import AWS from "aws-sdk";
import bytes from "bytes";
const debug = require("debug")("app:Folder:AWSUploader");

const REGION = "us-west-2";
const POOL_ID = "us-west-2:banabnbanabnbanbfnabnabnabnabna";
const BUCKET_NAME = "sigma-direct";

export default class AWSUploader {
  constructor(filename, type) {
    this.bucketName = BUCKET_NAME; //audio file store
    this.etag = []; // etag is used to save the parts of the single upload file
    this.partNumber = 0; // multipart requires incremetal so that they can merge all parts by ascending order
    this.filename = filename; //unique filename
    this.type = type;
    this.uploadId = ""; // upload id is required in multipart
    this.uploadPromises = [];
    this.curBlob = null;

    AWS.config.region = REGION;
    AWS.config.credentials = new AWS.CognitoIdentityCredentials({
      IdentityPoolId: POOL_ID
    });
    this.s3 = new AWS.S3();
    //make start request now, but don't block
    this.initalizedP = this.startMultiUpload();
  }

  upload(blob) {
    const prevUploads = [...this.uploadPromises]; //needs to be copy of array in prev state

    const f = async () => {
      await this.initalizedP; //make sure start request happened, I assume multiple blobs should never be waiting here
      await Promise.all(prevUploads); //ensure all prevUploads are done
      if (this.curBlob === null) {
        this.curBlob = blob;
      } else {
        this.curBlob = new Blob([this.curBlob, blob], { type: this.type });
      }

      debug("Currently", bytes(this.curBlob.size), "sends at 5mb");
      if (this.curBlob.size > bytes("5mb")) {
        const cb = this.curBlob;
        this.curBlob = null;
        await this.continueMultiUpload(cb);
      }
    };

    const uploadP = f();
    this.uploadPromises.push(uploadP);
    return uploadP;
  }

  /*
      Initiates a multipart upload and returns an upload ID.
      Upload id is used to upload the other parts of the stream
  */
  startMultiUpload() {
    debug("STARTING MULTIUPLOAD");
    const startParams = {
      Bucket: this.bucketName,
      Key: this.filename,
      ContentType: this.type,
      ACL: "private"
    };

    return new Promise(async (resolve, reject) => {
      this.s3.createMultipartUpload(startParams, (err, data) => {
        if (err) {
          reject(err);
        } else {
          debug("Created", data);
          this.uploadId = data.UploadId;
          resolve(data);
        }
      });
    });
  }

  /*
      Uploads a part in a multipart upload.
      The following code uploads part of a multipart upload. 
      it specifies a file name for the part data. The Upload ID is same that is returned by the initiate multipart upload. 
  */
  continueMultiUpload(blob) {
    this.partNumber += 1;
    const curPartNumber = this.partNumber;
    const params = {
      Body: blob,
      Bucket: this.bucketName,
      Key: this.filename,
      PartNumber: curPartNumber,
      UploadId: this.uploadId
    };
    debug("Continuing upload with", params);
    return new Promise((resolve, reject) => {
      this.s3.uploadPart(params, (err, data) => {
        if (err) {
          reject(err);
        } // an error occurred
        else {
          /*
              Once the part of data is uploaded we get an Entity tag for the uploaded object(ETag).
              which is used later when we complete our multipart upload.
          */
          debug("Uploaded part", curPartNumber);
          this.etag.push({
            ETag: data.ETag,
            PartNumber: curPartNumber
          });
          resolve(data);
        }
      });
    });
  }

  // Completes a multipart upload by assembling previously uploaded parts.
  async completeMultiUpload() {
    //wait for all current uploads, then check if any blobs left over
    await Promise.all(this.uploadPromises);
    debug("Leftover blobs", this.curBlob);
    if (this.curBlob) {
      await this.continueMultiUpload(this.curBlob);
    }

    debug("Finalizing parts", this.etag);
    this.etag = this.etag.sort((a, b) => a.PartNumber - b.PartNumber);
    const params = {
      Bucket: this.bucketName, // required
      Key: this.filename, // required
      UploadId: this.uploadId, // required
      MultipartUpload: {
        Parts: this.etag
      }
    };

    return new Promise((resolve, reject) => {
      this.s3.completeMultipartUpload(params, (err, data) => {
        if (err) {
          reject(err);
        } else {
          resolve(data);
        }
      });
    });
  }

  async abortMultiUpload() {
    //wait for current uploads just to be sure
    await Promise.all(this.uploadPromises);
    //s3 error if you try to abort an invalid uploadID or an uploadID with no parts uploaded yet
    if (!this.uploadId || this.partNumber === 0) {
      debug("Nothing to abort");
      return;
    }

    debug("Aborting");
    const params = {
      Bucket: this.bucketName, // required
      Key: this.filename, // required
      UploadId: this.uploadId // required
    };

    return new Promise((resolve, reject) => {
      this.s3.completeMultipartUpload(params, (err, data) => {
        if (err) {
          debug("Err had", err);
          reject(err);
        } else {
          debug("Aborted");
          resolve(data);
        }
      });
    });
  }
}

You are charged for incomplete multipart uploads, but you can add a lifecycle policy detailed here

Last updated