标题:求高手帮帮忙,我是刚刚接触C++,看不懂
只看楼主
hyj0616
Rank: 1
等 级:新手上路
帖 子:26
专家分:0
注 册:2016-8-1
结帖率:66.67%
 问题点数:0 回复次数:0 
求高手帮帮忙,我是刚刚接触C++,看不懂
int file_chunk_cdc(int fd, vector* features) {
    unsigned char buf[BUF_MAX_SIZE] = {0};
    unsigned char buf_bz[BUF_MAX_SIZE] = {0};
    unsigned char block_buf[BLOCK_MAX_SIZE * 2] = {0};
    unsigned char last_block_buf[BLOCK_MAX_SIZE * 2] = {0};
    char win_buf[BLOCK_WIN_SIZE + 1] = {0};
    unsigned char md5_str[33] = {0};
    unsigned char adler_pre_char;
    unsigned char md5_checksum[32 + 1] = {0};
    unsigned int bpos = 0;
    unsigned int rwsize = 0, bzsize = 0;
    unsigned int exp_rwsize = BUF_MAX_SIZE;
    unsigned int head, tail;
    unsigned int block_sz = 0, old_block_sz = 0;
    unsigned int hkey = 0;
    int ret = 0;

    feature_t f = 0;
    while(rwsize = read(fd, buf + bpos, exp_rwsize))
    {
        /* last chunk */
        
        if ((rwsize + bpos + block_sz) < BLOCK_MIN_SIZE){
            break;
        }

        head = 0;
        tail = bpos + rwsize;
        /* avoid unnecessary computation and comparsion */
        if (block_sz < (BLOCK_MIN_SIZE - BLOCK_WIN_SIZE))
        {
            old_block_sz = block_sz;
            block_sz = ((block_sz + tail - head) > (BLOCK_MIN_SIZE - BLOCK_WIN_SIZE)) ?
                    BLOCK_MIN_SIZE - BLOCK_WIN_SIZE : block_sz + tail -head;  
            memcpy(block_buf + old_block_sz, buf + head, block_sz - old_block_sz);
            head += (block_sz - old_block_sz);
        }

        while ((head + BLOCK_WIN_SIZE) <= tail)
        {
            memcpy(win_buf, buf + head, BLOCK_WIN_SIZE);
            /*
             * Firstly, i think rabinhash is the best. However, it's performance is very bad.
             * After some testing, i found ELF_hash is better both on performance and dedup rate.
             * So, EFL_hash is default. Now, adler_hash as default.
             */
            if (g_rolling_hash)
            {
                hkey = (block_sz == (BLOCK_MIN_SIZE - BLOCK_WIN_SIZE)) ? adler32_checksum(win_buf, BLOCK_WIN_SIZE) :
                    adler32_rolling_checksum(hkey, BLOCK_WIN_SIZE, adler_pre_char, buf[head+BLOCK_WIN_SIZE-1]);
            }
            else
                hkey = g_cdc_chunk_hashfunc(win_buf);

            /* get a normal chunk */
            if ((hkey % g_block_size) == CHUNK_CDC_R)
            {
                memcpy(block_buf + block_sz, buf + head, BLOCK_WIN_SIZE);
                head += BLOCK_WIN_SIZE;
                block_sz += BLOCK_WIN_SIZE;
                if (block_sz >= BLOCK_MIN_SIZE)
                {
                    md5(block_buf, block_sz, md5_checksum);
                    f = md5_2_feature(md5_checksum);
                    VEC_PUSH_BACK(features, &f);
                    /*
                    if (0 != (ret = dedup_regfile_block_process(block_buf, block_sz,
                        md5_checksum, fd_ldata, fd_bdata, pos, block_num, metadata, htable)))
                    {
                        perror("dedup_reggile_block_process in file_chunk_cdc");
                        goto _FILE_CHUNK_CDC_EXIT;
                    }
                    */
                    block_sz = 0;
                }
            }
            else
            {
                block_buf[block_sz++] = buf[head++];
                /* get an abnormal chunk */
                if (block_sz >= BLOCK_MAX_SIZE)
                {
                    md5(block_buf, block_sz, md5_checksum);
                    f = md5_2_feature(md5_checksum);
                    VEC_PUSH_BACK(features, &f);
                    /*
                    if (0 != (ret = dedup_regfile_block_process(block_buf, block_sz,
                        md5_checksum, fd_ldata, fd_bdata, pos, block_num, metadata, htable)))
                    {
                        perror("dedup_reggile_block_process in file_chunk_cdc");
                        goto _FILE_CHUNK_CDC_EXIT;
                    }
                    */
                    block_sz = 0;
                }
            }

            /* avoid unnecessary computation and comparsion */
            if (block_sz == 0)
            {
                block_sz = ((tail - head) > (BLOCK_MIN_SIZE - BLOCK_WIN_SIZE)) ?
                    BLOCK_MIN_SIZE - BLOCK_WIN_SIZE : tail - head;
                memcpy(block_buf, buf + head, block_sz);
                head = ((tail - head) > (BLOCK_MIN_SIZE - BLOCK_WIN_SIZE)) ?
                    head + (BLOCK_MIN_SIZE - BLOCK_WIN_SIZE) : tail;
            }

            adler_pre_char = buf[head -1];
        }

        /* read expected data from file to full up buf */
        bpos = tail - head;
        exp_rwsize = BUF_MAX_SIZE - bpos;
        adler_pre_char = buf[head -1];
        memmove(buf, buf + head, bpos);
    }
    /* last chunk */

    int last_block_len = ((rwsize + bpos + block_sz) >= 0) ? rwsize + bpos + block_sz : 0;
    if (last_block_len > 0)
    {
        memcpy(last_block_buf, block_buf, block_sz);
        memcpy(last_block_buf + block_sz, buf, rwsize + bpos);

        md5(last_block_buf, last_block_len, md5_checksum);
        f = md5_2_feature(md5_checksum);
        VEC_PUSH_BACK(features, &f);
    }

_FILE_CHUNK_CDC_EXIT:
    return 0;
}


/* slide block chunk */
int file_chunk_sb(int fd, vector* features) {
    char buf[BUF_MAX_SIZE] = {0};
    char buf_bz[BUF_MAX_SIZE] = {0};
    char win_buf[BLOCK_MAX_SIZE * 2] = {0};
    char block_buf[BLOCK_MAX_SIZE * 2] = {0};
    char adler_pre_char;
    unsigned char md5_checksum[32 + 1] = {0};
    unsigned char md5_checksum1[32 + 1] = {0};
    char crc_checksum[16] = {0};
    unsigned int bpos = 0;
    unsigned int slide_sz = 0;
    unsigned int rwsize = 0, bzsize = 0, bzsize_f = 0;
    unsigned int exp_rwsize = BUF_MAX_SIZE;
    unsigned int head, tail;
    unsigned int hkey = 0;
    unsigned int bflag = 0;
    int ret = 0;

    hashtable* sb_htable = create_hashtable(g_htab_bucket_nr);
    hashtable* sb_htable_crc = create_hashtable(g_htab_bucket_nr);
    if (NULL == sb_htable_crc || sb_htable == NULL)
        return -1;

    feature_t f, f1;
    while(rwsize = read(fd, buf + bpos, exp_rwsize)) {
        /* last chunk */
        /*
        if ((rwsize + bpos + slide_sz) < g_block_size)
            break;
        */
        head = 0;
        tail = bpos + rwsize;
        while ((head + g_block_size) <= tail) {
            memcpy(win_buf, buf + head, g_block_size);
            hkey = (slide_sz == 0) ? adler32_checksum(win_buf, g_block_size) :
                adler32_rolling_checksum(hkey, g_block_size, adler_pre_char, buf[head+g_block_size-1]);

            uint_2_str(hkey, crc_checksum);

            /* bflag: 0, both CRC and MD5 are not idenitical
                      1, both CRC and MD5 are identical
                  2, CRC is identical and MD5 is not
             */
            bflag = 0;

            /* this block maybe is duplicate */
            bzsize = g_block_size;
            if (hash_exist(sb_htable_crc, crc_checksum))
            {   
                bflag = 2;
                md5((unsigned char*)win_buf, bzsize, md5_checksum);
                f = md5_2_feature(md5_checksum);
                md5_2_str(md5_checksum);
                if (hash_exist(sb_htable, (char*)md5_checksum))
                {
                    /* insert fragment */
                    if (slide_sz != 0)
                    {
                        md5((unsigned char*)block_buf, slide_sz, md5_checksum1);
                        f1 = md5_2_feature(md5_checksum1);
                        VEC_PUSH_BACK(features, &f1);
                          
                        /*  
                        if (0 != (ret = dedup_regfile_block_process(block_buf, slide_sz, md5_checksum1,
                            fd_ldata, fd_bdata, pos, block_num, metadata, htable)))
                        {
                            perror("dedup_regfile_block_process in file_chunk_sb");
                            goto _FILE_CHUNK_SB_EXIT;
                        }
                        */
                    }

                    VEC_PUSH_BACK(features, &f);
                    /* insert fixed-size block */
                    /*
                    if (0 != (ret = dedup_regfile_block_process(win_buf, bzsize, md5_checksum,
                        fd_ldata, fd_bdata, pos, block_num, metadata, htable)))
                    {
                        perror("dedup_regfile_block_process in file_chunk_sb");
                        goto _FILE_CHUNK_SB_EXIT;
                    }
                    */

                    head += g_block_size;
                    slide_sz = 0;
                    bflag = 1;
                }
            }

            /* this block is not duplicate */
            if (bflag != 1)
            {
                block_buf[slide_sz++] = buf[head++];
                if (slide_sz == g_block_size)
                {
                    bzsize = g_block_size;

                    /* calculate checksum and check in */
                    hkey = adler32_checksum(block_buf, bzsize);
                    uint_2_str(hkey, crc_checksum);
                    hash_checkin(sb_htable_crc, crc_checksum);

                    md5((unsigned char*)block_buf, bzsize, md5_checksum);
                    f = md5_2_feature(md5_checksum);
                    VEC_PUSH_BACK(features, &f);
                    /*
                    if (0 != (ret = dedup_regfile_block_process(block_buf, bzsize, md5_checksum,
                        fd_ldata, fd_bdata, pos, block_num, metadata, htable)))
                    {
                        perror("dedup_regfile_block_process in file_chunk_sb");
                        goto _FILE_CHUNK_SB_EXIT;
                    }
                    */

                    slide_sz = 0;
                }
            }

            adler_pre_char = buf[head - 1];
        }

        /* read expected data from file to full up buf */
        bpos = tail - head;
        exp_rwsize = BUF_MAX_SIZE - bpos;
        adler_pre_char = buf[head - 1];
        memmove(buf, buf + head, bpos);
    }
    /* last chunk */
    /*
    *last_block_len = ((rwsize + bpos + slide_sz) > 0) ? rwsize + bpos + slide_sz : 0;
    if (*last_block_len > 0)
    {
        memcpy(last_block_buf, block_buf, slide_sz);
        memcpy(last_block_buf + slide_sz, buf, rwsize + bpos);
    }
    */
_FILE_CHUNK_SB_EXIT:
    lseek(fd, 0, SEEK_SET);
    hash_free(sb_htable);
    hash_free(sb_htable_crc);
    return 0;
}





搜索更多相关主题的帖子: features 
2016-08-01 20:53



参与讨论请移步原网站贴子:https://bbs.bccn.net/thread-467429-1-1.html




关于我们 | 广告合作 | 编程中国 | 清除Cookies | TOP | 手机版

编程中国 版权所有,并保留所有权利。
Powered by Discuz, Processed in 0.457507 second(s), 8 queries.
Copyright©2004-2024, BCCN.NET, All Rights Reserved