file – 将近400k图像传输到S3的最有效方法

我目前负责将一个站点从当前服务器转移到EC2,该项目的一部分已经完成并且很好,另一部分是我正在努力的部分,该站点目前有近400K图像,所有这些都在不同的文件夹中排序在一个主userimg文件夹中,客户端希望所有这些图像都存储在S3上 – 我的主要问题是如何将近400,000张图像从服务器传输到S3 – 我一直在使用 http://s3tools.org/s3cmd,这很棒但是如果我是使用s3cmd传输userimg文件夹它将花费将近3天的时间,如果连接中断或类似问题,我将在s3上有一些图像而有些图像没有,无法继续进程…

任何人都可以建议一个解决方案,有没有人遇到过这样的问题呢?

我建议你写一个简单的Java实用程序(或者让别人写一个):

>读取客户端目录的结构(如果需要)
>对于每个图像,在s3上创建相应的键(根据1中读取的文件结构),并使用AWS SDK或jets3t API以并行方式启动多部分上载.

我是为我们的客户做的.它不到200行java代码,非常可靠.
下面是执行多部分上传的部分.读取文件结构的部分很简单.

/**
 * Uploads file to Amazon S3. Creates the specified bucket if it does not exist.
 * The upload is done in chunks of CHUNK_SIZE size (multi-part upload).
 * Attempts to handle upload exceptions gracefully up to MAX_RETRY times per single chunk.
 * 
 * @param accessKey     - Amazon account access key
 * @param secretKey     - Amazon account secret key
 * @param directoryName - directory path where the file resides
 * @param keyName       - the name of the file to upload
 * @param bucketName    - the name of the bucket to upload to
 * @throws Exception    - in case that something goes wrong
 */
public void uploadFileToS3(String accessKey
        ,String secretKey
        ,String directoryName
        ,String keyName // that is the file name that will be created after upload completed
        ,String bucketName ) throws Exception {

    // Create a credentials object and service to access S3 account
    AWSCredentials myCredentials =
        new BasicAWSCredentials(accessKey, secretKey);

    String filePath = directoryName
    + System.getProperty("file.separator")
    + keyName;   

    log.info("uploadFileToS3 is about to upload file [" + filePath + "]");

    AmazonS3 s3Client = new AmazonS3Client(myCredentials);        
    // Create a list of UploadPartResponse objects. You get one of these
    // for each part upload.
    List<PartETag> partETags = new ArrayList<PartETag>();

    // make sure that the bucket exists
    createBucketIfNotExists(bucketName, accessKey, secretKey);

    // delete the file from bucket if it already exists there
    s3Client.deleteObject(bucketName, keyName);

    // Initialize.
    InitiateMultipartUploadRequest initRequest = new InitiateMultipartUploadRequest(bucketName, keyName);
    InitiateMultipartUploadResult initResponse = s3Client.initiateMultipartUpload(initRequest);

    File file = new File(filePath);

    long contentLength = file.length();
    long partSize = CHUNK_SIZE; // Set part size to 5 MB.
    int numOfParts = 1;
    if (contentLength > CHUNK_SIZE) {
        if (contentLength % CHUNK_SIZE != 0) {
            numOfParts = (int)((contentLength/partSize)+1.0);
        }
        else {
            numOfParts = (int)((contentLength/partSize));
        }
    }

    try {
        // Step 2: Upload parts.
        long filePosition = 0;
        for (int i = 1; filePosition < contentLength; i++) {
            // Last part can be less than 5 MB. Adjust part size.
            partSize = Math.min(partSize, (contentLength - filePosition));

            log.info("Start uploading part[" + i + "] of [" + numOfParts + "]");

            // Create request to upload a part.
            UploadPartRequest uploadRequest = new UploadPartRequest()
            .withBucketName(bucketName).withKey(keyName)
            .withUploadId(initResponse.getUploadId()).withPartNumber(i)
            .withFileOffset(filePosition)
            .withFile(file)
            .withPartSize(partSize);

            // repeat the upload until it succeeds or reaches the retry limit
            boolean anotherPass;
            int retryCount = 0;
            do {
                anotherPass = false;  // assume everything is ok
                try {
                    log.info("Uploading part[" + i + "]");
                    // Upload part and add response to our list.
                    partETags.add(s3Client.uploadPart(uploadRequest).getPartETag());
                    log.info("Finished uploading part[" + i + "] of [" + numOfParts + "]");
                } catch (Exception e) {
                    log.error("Failed uploading part[" + i + "] due to exception. Will retry... Exception: ", e);
                    anotherPass = true; // repeat
                    retryCount++;
                }
            }
            while (anotherPass && retryCount < CloudUtilsService.MAX_RETRY);

            filePosition += partSize;
            log.info("filePosition=[" + filePosition + "]");

        }
        log.info("Finished uploading file");

        // Complete.
        CompleteMultipartUploadRequest compRequest = new 
        CompleteMultipartUploadRequest(
                bucketName, 
                keyName, 
                initResponse.getUploadId(), 
                partETags);

        s3Client.completeMultipartUpload(compRequest);

        log.info("multipart upload completed.upload id=[" + initResponse.getUploadId() + "]");
    } catch (Exception e) {
        s3Client.abortMultipartUpload(new AbortMultipartUploadRequest(
                bucketName, keyName, initResponse.getUploadId()));

        log.error("Failed to upload due to Exception:", e);

        throw e;
    }
}


/**
 * Creates new bucket with the names specified if it does not exist.
 * 
 * @param bucketName    - the name of the bucket to retrieve or create
 * @param accessKey     - Amazon account access key
 * @param secretKey     - Amazon account secret key
 * @throws S3ServiceException - if something goes wrong
 */
public void createBucketIfNotExists(String bucketName, String accessKey, String secretKey) throws S3ServiceException {
    try {
        // Create a credentials object and service to access S3 account
        org.jets3t.service.security.AWSCredentials myCredentials =
            new org.jets3t.service.security.AWSCredentials(accessKey, secretKey);
        S3Service service = new RestS3Service(myCredentials);

        // Create a new bucket named after a normalized directory path,
        // and include my Access Key ID to ensure the bucket name is unique
        S3Bucket zeBucket = service.getOrCreateBucket(bucketName);
        log.info("the bucket [" + zeBucket.getName() + "] was created (if it was not existing yet...)");
    } catch (S3ServiceException e) {
        log.error("Failed to get or create bucket[" + bucketName + "] due to exception:", e);
        throw e;
    }
}
相关文章
相关标签/搜索